@inproceedings{032a9cb8c0614473b9d967babe440279,
title = "Noisy-channel spelling correction models for Estonian learner language corpus lemmatisation",
abstract = "Morphological analysis is an important task in Estonian learner language studies that gives information about the words and forms used by the learners. Because of the spelling errors frequently occurring in language learner texts, these texts should undergo some error correction step before applying the conventional morphological analysis tools because the morphological analyser fails to find the correct analysis for the misspelled words. In this paper we compare several different spelling correction models with the aim of improving the lemmatisation accuracy of learner language texts. Experiments show that the simplest non-word noisy-channel spelling correction model with a disambiguation model applied on top of the morphological analyser output performs the best while some of the more complicated models even fail to beat the baseline that does not include any spelling correction.",
keywords = "spelling correction, learner languages analysis, lemmatisation",
author = "Kairit Sirts",
year = "2012",
doi = "10.3233/978-1-61499-133-5-213",
language = "English",
isbn = "9781614991328",
series = "Frontiers in artificial intelligence and applications",
publisher = "IOS Press",
pages = "213--220",
editor = "Arvi Tavast and Kadri Muischnek and Mare Koit",
booktitle = "Human language technologies",
address = "Netherlands",
note = "Baltic Conference on Human Language Technologies (5th : 2012) ; Conference date: 04-10-2012 Through 05-10-2012",
}