@incollection{L{\"u}ngenBeißwengerEhrhardtetal.2016, author = {L{\"u}ngen, Harald and Beißwenger, Michael and Ehrhardt, Eric and Herold, Axel and Storrer, Angelika}, title = {Integrating corpora of computer-mediated communication in CLARIN-D: Results from the curation project ChatCorpus2CLARIN}, booktitle = {Proceedings of the 13th Conference on Natural Language Processing (KONVENS)}, editor = {Dipper, Stefanie and Neubarth, Friedrich and Zinsmeister, Heike}, issn = {2190-0949}, url = {https://www.linguistics.ruhr-uni-bochum.de/bla/}, series = {Bochumer Linguistische Arbeitsberichte}, number = {16}, pages = {156 -- 164}, year = {2016}, abstract = {We introduce our pipeline to integrate CMC and SM corpora into the CLARIN-D corpus infrastructure. The pipeline was developed by transforming an existing CMC corpus, the Dortmund Chat Corpus, into a resource conforming to current technical and legal standards. We describe how the resource has been prepared and restructured in terms of TEI encoding, linguistic annotations, and anonymisation. The output is a CLARIN-conformant resource integrated in the CLARIN-D research infrastructure.}, subject = {Deutsch}, language = {en} }