@article{BubenhoferHauptSchwinn2016, author = {Noah Bubenhofer and Stefanie Haupt and Horst Schwinn}, title = {A comparable Wikipedia corpus: from wiki syntax to POS tagged XML}, series = {[Arbeiten zur Mehrsprachigkeit / B] Arbeiten zur Mehrsprachigkeit = Working papers in multilingualism / Sonderforschungsbereich 538 Mehrsprachigkeit 538, Universit{\"a}t Hamburg}, number = {96}, publisher = {Universit{\"a}t Hamburg}, address = {Hamburg}, issn = {0176-599X}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-51897}, pages = {141 -- 144}, year = {2016}, abstract = {To build a comparable Wikipedia corpus of German, French, Italian, Norwegian, Polish and Hungarian for contrastive grammar research, we used a set of XSLT stylesheets to transform the mediawiki anntations to XML. Furthermore, the data has been amnntated with word class information using different taggers. The outcome is a corpus with rich meta data and linguistic annotation that can be used for multilingual research in various linguistic topics.}, language = {en} }