@article{BubenhoferHauptSchwinn2016, author = {Bubenhofer, Noah and Haupt, Stefanie and Schwinn, Horst}, title = {A comparable Wikipedia corpus: from wiki syntax to POS tagged XML}, journal = {[Arbeiten zur Mehrsprachigkeit / B] Arbeiten zur Mehrsprachigkeit = Working papers in multilingualism / Sonderforschungsbereich 538 Mehrsprachigkeit 538, Universit{\"a}t Hamburg}, number = {96}, issn = {0176-599X}, pages = {141 -- 144}, year = {2016}, abstract = {To build a comparable Wikipedia corpus of German, French, Italian, Norwegian, Polish and Hungarian for contrastive grammar research, we used a set of XSLT stylesheets to transform the mediawiki anntations to XML. Furthermore, the data has been amnntated with word class information using different taggers. The outcome is a corpus with rich meta data and linguistic annotation that can be used for multilingual research in various linguistic topics.}, subject = {Korpus }, language = {en} }