@incollection{LuengenBeisswengerEhrhardtetal.2016, author = {Harald L{\"u}ngen and Michael Bei{\"s}wenger and Eric Ehrhardt and Axel Herold and Angelika Storrer}, title = {Integrating corpora of computer-mediated communication in CLARIN-D: Results from the curation project ChatCorpus2CLARIN}, series = {Proceedings of the 13th Conference on Natural Language Processing (KONVENS)}, editor = {Stefanie Dipper and Friedrich Neubarth and Heike Zinsmeister}, publisher = {Sprachwissenschaftliches Institut, Ruhr-Universit{\"a}t Bochum}, address = {Bochum}, issn = {2190-0949}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-55743}, pages = {156 -- 164}, year = {2016}, abstract = {We introduce our pipeline to integrate CMC and SM corpora into the CLARIN-D corpus infrastructure. The pipeline was developed by transforming an existing CMC corpus, the Dortmund Chat Corpus, into a resource conforming to current technical and legal standards. We describe how the resource has been prepared and restructured in terms of TEI encoding, linguistic annotations, and anonymisation. The output is a CLARIN-conformant resource integrated in the CLARIN-D research infrastructure.}, language = {en} }