@inproceedings{RehbeinVisserLestmann2016, author = {Ines Rehbein and Emiel Visser and Nadine Lestmann}, title = {Discussing best practices for the annotation of Twitter microtext}, series = {Proceedings of The Third Workshop on Annotation of Corpora for Research in the Humanities (ACRH-3). 12 December 2013. Sofia, Bulgaria}, editor = {Francesco Mambrini and Marco Passarotti and Caroline Sporleder}, publisher = {Bulgarian Academy of Sciences}, address = {Sofia}, isbn = {978-954-91700-5-4}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-56013}, pages = {73 -- 84}, year = {2016}, abstract = {This paper contributes to the discussion on best practices for the syntactic analysis of non-canonical language, focusing on Twitter microtext. We present an annotation experiment where we test an existing POS tagset, the Stuttgart-T{\"u}bingen Tagset (STTS), with respect to its applicability for annotating new text from the social media, in particular from Twitter microblogs. We discuss different tagset extensions proposed in the literature and test our extended tagset on a set of 506 tweets (7.418 tokens) where we achieve an inter-annotator agreement for two human annotators in the range of 92.7 to 94.4 (k). Our error analysis shows that especially the annotation of Twitterspecific phenomena such as hashtags and at-mentions causes disagreements between the human annotators. Following up on this, we provide a discussion of the different uses of the @- and \#-marker in Twitter and argue against analysing both on the POS level by means of an at-mention or hashtag label. Instead, we sketch a syntactic analysis which describes these phenomena by means of syntactic categories and grammatical functions.}, language = {en} }