@inproceedings{93bc24ce6eed4d418d79a3c92e1c690b,
title = "Designing and evaluating a Russian tagset",
abstract = "This paper reports the principles behind designing a tagset to cover Russian morphosyntactic phenomena, modifications of the core tagset, and its evaluation. The tagset and associated morphosyntactic specifications are based on the MULTEXT-East framework, while the decisions in designing it were aimed at achieving a balance between parameters important for linguists and the possibility to detect and disambiguate them automatically. The final tagset contains about 600 tags and achieves about 95% accuracy on the disambiguated portion of the Russian National Corpus. We have also produced a test set of tagging models and corpora that can be shared with other researchers.",
author = "Serge Sharoff and Mikhail Kopotev and Toma{\v z} Erjavec and Anna Feldman and Dagmar Divjak",
year = "2008",
month = jan,
day = "1",
language = "English",
series = "Proceedings of the 6th International Conference on Language Resources and Evaluation, LREC 2008",
publisher = "European Language Resources Association (ELRA)",
pages = "279--285",
booktitle = "Proceedings of the 6th International Conference on Language Resources and Evaluation, LREC 2008",
note = "null ; Conference date: 28-05-2008 Through 30-05-2008",
}