@article{Schmidt2017, author = {Thomas Schmidt}, title = {Construction and dissemination of a corpus of spoken interaction - tools and workflows in the FOLK project}, series = {Journal for language technology and computational linguistics (JLCL)}, volume = {31}, number = {1}, editor = {Marc Kupietz and Alexander Geyken}, address = {Berlin}, issn = {2190-6858}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-62156}, pages = {127 -- 154}, year = {2017}, abstract = {This paper is about the workflow for construction and dissemination of FOLK (Forschungs - und Lehrkorpus Gesprochenes Deutsch – Research and Teaching Corpus of Spoken German), a large corpus of authentic spoken interaction data, recorded on audio and video. Section 2 describes in detail the tools used in the individual steps of transcription, anonymization, orthographic normalization, lemmatization and POS tagging of the data, as well as some utilities used for corpus management. Section 3 deals with the DGD (Datenbank f{\"u}r Gesprochenes Deutsch - Database of Spoken German) as a tool for distributing completed data sets and making them available for qualitative and quantitative analysis. In section 4, some plans for further development are sketched.}, language = {en} }