@inproceedings{RehbeinvanGenabith2017, author = {Ines Rehbein and Josef van Genabith}, title = {Why is it so difficult to compare treebanks? TIGER and T{\"u}Ba-D/Z revisited}, series = {The Sixth International Workshop on Treebanks and Linguistic Theories (TLT ‘07). Bergen, Norway. December 7–8, 2007}, editor = {Koenraad De Smedt and Jan Hajič and Sandra K{\"u}bler}, publisher = {Northern European Association for Language Technology}, address = {Tartu}, issn = {1736-6305}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-57822}, pages = {115 -- 126}, year = {2017}, abstract = {This paper is a contribution to the ongoing discussion on treebank annotation schemes and their impact on PCFG parsing results. We provide a thorough comparison of two German treebanks: the TIGER treebank and the T{\"u}Ba-D/Z. We use simple statistics on sentence length and vocabulary size, and more refined methods such as perplexity and its correlation with PCFG parsing results, as well as a Principal Components Analysis. Finally we present a qualitative evaluation of a set of 100 sentences from the T{\"u}Ba- D/Z, manually annotated in the TIGER as well as in the T{\"u}Ba-D/Z annotation scheme, and show that even the existence of a parallel subcorpus does not support a straightforward and easy comparison of both annotation schemes.}, language = {en} }