@incollection{HedelandSchmidt2020, author = {Hanna Hedeland and Thomas Schmidt}, title = {Technological and methodological challenges in creating, annotating and sharing a learner corpus of spoken German}, series = {Multilingual Corpora and Multilingual Corpus Analysis}, editor = {Thomas Schmidt and Kai W{\"o}rner}, publisher = {Benjamins}, address = {Amsterdam}, isbn = {9789027219343}, doi = {10.1075/hsm.14.04hed}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-97229}, pages = {25 -- 46}, year = {2020}, abstract = {This article discusses questions concerning the creation, annotation and sharing of spoken language corpora. We use the Hamburg Map Task Corpus (HAMATAC), a small corpus in which advanced learners of German were recorded solving a map task, as an example to illustrate our main points. We first give an overview of the corpus creation and annotation process including recording, metadata documentation, transcription and semi-automatic annotation of the data. We then discuss the manual annotation of disfluencies as an example case in which many of the typical and challenging problems for data reuse – in particular the reliability of interpretative annotations – are revealed.}, language = {en} }