BibTeX
@InProceedings{Schulz2016b,
abstract = {In this paper, we describe the development of a language identification system and a part-of-speech tagger for Latin-Middle English mixed text. To this end, we annotate data with language IDs and Universal POS tags (Petrov et al., 2012). As a classifier, we train a conditional random field classifier for both sub-tasks, including features generated by the TreeTagger models of both languages. The focus lies on both a general and a task-specific evaluation. Moreover, we describe our effort concerning beyond proof-of-concept implementation of tools and towards a more task-oriented approach, showing how to apply our techniques in the context of Humanities research.
},
address = {Berlin, Germany},
author = {Schulz, Sarah and Keller, Mareike},
booktitle={{Proceedings of the 10th SIGHUM Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities}},
month = {August},
pages = {43-51},
publisher = {Association for Computational Linguistics},
title={{Code-Switching Ubique Est - Language Identification and Part-of-Speech Tagging for Historical Mixed Text}},
year = 2016
}