@incollection{BrunnerEngelbergJannidisetal.2020, author = {Annelen Brunner and Stefan Engelberg and Fotis Jannidis and Ngoc Duyen Tanja Tu and Lukas Weimer}, title = {Corpus REDEWIEDERGABE}, series = {Proceedings of the 12th International Conference on Language Resources and Evaluation (LREC), May 11-16, 2020, Palais du Pharo, Marseille, France}, editor = {Nicoletta Calzolari and Fr{\´e}d{\´e}ric B{\´e}chet and Philippe Blache and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and H{\´e}l{\`e}ne Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association}, address = {Paris}, isbn = {979-10-95546-34-4}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-98963}, pages = {803 -- 812}, year = {2020}, abstract = {This article presents the corpus REDEWIEDERGABE, a German-language historical corpus with detailed annotations for speech, thought and writing representation (ST\&WR). With approximately 490,000 tokens, it is the largest resource of its kind. It can be used to answer literary and linguistic research questions and serve as training material for machine learning. This paper describes the composition of the corpus and the annotation structure, discusses some methodological decisions and gives basic statistics about the forms of ST\&WR found in this corpus.}, language = {en} }