@article{KlosaKupietzLuengen2012, author = {Klosa, Annette and Kupietz, Marc and L{\"u}ngen, Harald}, title = {Zum Nutzen von Korpusauszeichnungen f{\"u}r die Lexikographie}, series = {Lexicographica : international annual for lexicography}, volume = {28}, publisher = {de Gruyter}, address = {Berlin}, issn = {0175-6206}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-8554}, pages = {71 -- 97}, year = {2012}, language = {de} } @article{KunzeLemnitzerLuengenetal.2007, author = {Kunze, Claudia and Lemnitzer, Lothar and L{\"u}ngen, Harald and Storrer, Angelika}, title = {Repr{\"a}sentation und Verkn{\"u}pfung allgemeinsprachlicher und terminologischer Wortnetze in OWL}, series = {Zeitschrift f{\"u}r Sprachwissenschaft : Organ der Deutschen Gesellschaft f{\"u}r Sprachwissenschaft}, volume = {26}, number = {2}, publisher = {de Gruyter}, address = {Berlin}, issn = {1613-3706}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-8621}, pages = {267 -- 290}, year = {2007}, abstract = {This paper describes an approach to modelling a general-language wordnet, GermaNet, and a domain-specific wordnet, TermNet, in the web ontology language OWL. While the modelling process for GermaNet adopts relevant recommendations with respect to the English Princeton WordNet, for Term-Net an alternative modelling concept is developed that considers the special characteristics of domain-specific terminologies. We present a proposal for linking a general-language wordnet and a terminological wordnet within the framework of OWL and on this basis discuss problems and alternative modelling approaches.}, language = {de} } @incollection{LobinLuengenHilbertetal.2011, author = {Lobin, Henning and L{\"u}ngen, Harald and Hilbert, Mirco and B{\"a}renf{\"a}nger, Maja}, title = {Processing Text-Technological Resources in Discourse Parsing}, series = {Modeling, Learning, and Processing of Text-Technological Data Structures}, editor = {Mehler, Alexander and K{\"u}hnberger, Kai-Uwe and Lobin, Henning and L{\"u}ngen, Harald and Storrer, Angelika and Witt, Andreas}, publisher = {Springer}, address = {Berlin/Heidelberg}, isbn = {978-3-642-22612-0 (Print)}, doi = {10.1007/978-3-642-22613-7_3}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-48317}, series = {Studies in Computational Intelligence}, number = {370}, pages = {35 -- 58}, year = {2011}, abstract = {Discourse parsing of complex text types such as scientific research articles requires the analysis of an input document on linguistic and structural levels that go beyond traditionally employed lexical discourse markers. This chapter describes a text-technological approach to discourse parsing. Discourse parsing with the aim of providing a discourse structure is seen as the addition of a new annotation layer for input documents marked up on several linguistic annotation levels. The discourse parser generates discourse structures according to the Rhetorical Structure Theory. An overview of the knowledge sources and components for parsing scientific joumal articles is given. The parser's core consists of cascaded applications of the GAP, a Generic Annotation Parser. Details of the chart parsing algorithm are provided, as well as a short evaluation in terms of comparisons with reference annotations from our corpus and with recently developed Systems with a similar task.}, language = {en} } @incollection{LuengenBeisswengerSelzametal.2011, author = {L{\"u}ngen, Harald and Beißwenger, Michael and Selzam, Bianca and Storrer, Angelika}, title = {Modelling and Processing Wordnets in OWL}, series = {Modelling, Learning, and Processing of Text-Technological Data Structures}, editor = {Mehler, Alexander and K{\"u}hnberger, Kai-Uwe and Lobin, Henning and L{\"u}ngen, Harald and Storrer, Angelika and Witt, Andreas}, publisher = {Springer}, address = {Berlin/Heidelberg}, isbn = {978-3-642-22612-0}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-48322}, series = {Studies in Computational Intelligence}, number = {370}, pages = {347 -- 376}, year = {2011}, abstract = {In this contribution, we discuss and compare alternative options of modelling the entities and relations of wordnet-like resources in the Web Ontology Language OWL. Based on different modelling options, we developed three models of representing wordnets in OWL, i.e. the instance model, the dass model, and the metaclass model. These OWL models mainly differ with respect to the ontological Status of lexical units (word senses) and the synsets. While in the instance model lexical units and synsets are represented as individuals, in the dass model they are represented as classes; both model types can be encoded in the dialect OWL DL. As a third alternative, we developed a metaclass model in OWL FULL, in which lexical units and synsets are defined as metaclasses, the individuals of which are classes themselves. We apply the three OWL models to each of three wordnet-style resources: (1) a subset of the German wordnet GermaNet, (2) the wordnet-style domain ontology TermNet, and (3) GermaTermNet, in which TermNet technical terms and GermaNet synsets are connected by means of a set of "plug-in" relations. We report on the results of several experiments in which we evaluated the performance of querying and processing these different models: (1) A comparison of all three OWL models (dass, instance, and metaclass model) of TermNet in the context of automatic text-to-hypertext conversion, (2) an investigation of the potential of the GermaTermNet resource by the example of a wordnet-based semantic relatedness calculation.}, language = {en} } @incollection{LuengenHebborn2012, author = {L{\"u}ngen, Harald and Hebborn, Mariana}, title = {Linguistische Annotationen f{\"u}r die Analyse von Gliederungsstrukturen wissenschaftlicher Texte}, series = {Kulturwissenschaften Digital. Neue Forschungsfragen und Methoden}, editor = {Klawitter, Jana and Lobin, Henning and Schmidt, Torben}, publisher = {Campus}, address = {Frankfurt am Main}, isbn = {978-3-593-41287-0}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-47959}, pages = {155 -- 176}, year = {2012}, language = {de} } @article{BaerenfaengerBinderLobinetal.2011, author = {B{\"a}renf{\"a}nger, Maja and Binder, Frank and Lobin, Henning and L{\"u}ngen, Harald and St{\"u}hrenberg, Maik}, title = {Editorial}, series = {Journal for Language Technology and Computational Linguistics}, volume = {26}, number = {1}, publisher = {GSCL}, address = {Regensburg}, issn = {2190-6858}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-47970}, pages = {V -- V}, year = {2011}, language = {de} } @article{BinderLuengenLobin2011, author = {Binder, Frank and L{\"u}ngen, Harald and Lobin, Henning}, title = {Sprachressourcen in der Lehre - Erfahrungen, Einsatzszenarien, Nutzerw{\"u}nsche}, series = {Journal for Language Technology and Computational Linguistics}, volume = {26}, number = {1}, publisher = {GSCL}, address = {Regensburg}, issn = {2190-6858}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-47980}, pages = {53 -- 65}, year = {2011}, language = {de} } @incollection{LuengenBaerenfaengerHilbertetal.2010, author = {L{\"u}ngen, Harald and B{\"a}renf{\"a}nger, Maja and Hilbert, Mirco and Lobin, Henning and Pusk{\´a}s, Csilla}, title = {Discourse Relations and Document Structure}, series = {Linguistic Modeling of Information and Markup Languages. Contributions to Language Technology}, editor = {Witt, Andreas and Dieter, Metzing}, publisher = {Springer}, address = {Dordrecht}, isbn = {978-90-481-3330-7}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-48005}, series = {Text, Speech and Language Technology}, number = {41}, pages = {97 -- 123}, year = {2010}, abstract = {This chapter addresses the requirements and linguistic foundations of automatic relational discourse analysis of complex text types such as scientific journal articles. It is argued that besides lexical and grammatical discourse markers, which have traditionally been employed in discourse parsing, cues derived from the logical and generical document structure and the thematic structure of a text must be taken into account. An approach to modelling such types of linguistic information in terms of XML-based multi-layer annotations and to a text-technological representation of additional knowledge sources is presented. By means of quantitative and qualitative corpus analyses, cues and constraints for automatic discourse analysis can be derived. Furthermore, the proposed representations are used as the input sources for discourse parsing. A short overview of the projected parsing architecture is given.}, language = {en} } @inproceedings{BayerlLuengenGoeckeetal.2003, author = {Bayerl, Petra Saskia and L{\"u}ngen, Harald and Goecke, Daniela and Witt, Andreas and Naber, Daniel}, title = {Methods for the semantic analysis of document markup}, series = {Proceedings of the ACM Symposium on Document Engineering (DocEng 2003)}, editor = {Roisin, C{\´e}cile and Munson, Ethan and Vanoirbeek, Christine}, publisher = {ACM}, address = {New York}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-48014}, pages = {161 -- 170}, year = {2003}, abstract = {We present an approach on how to investigate what kind of semantic information is regularly associated with the structural markup of scientific articles. This approach addresses the need for an explicit formal description of the semantics of text-oriented XML-documents. The domain of our investigation is a corpus of scientific articles from psychology and linguistics from both English and German online available journals. For our analyses, we provide XML-markup representing two kinds of semantic levels: the thematic level (i.e. topics in the text world that the article is about) and the functional or rhetorical level. Our hypothesis is that these semantic levels correlate with the articles' document structure also represented in XML. Articles have been annotated with the appropriate information. Each of the three informational levels is modelled in a separate XML document, since in our domain, the different description levels might conflict so that it is impossible to model them within a single XML document. For comparing and mining the resulting multi-layered XML annotations of one article, a Prolog-based approach is used. It focusses on the comparison of XML markup that is distributed among different documents. Prolog predicates have been defined for inferring relations between levels of information that are modelled in separate XML documents. We demonstrate how the Prolog tool is applied in our corpus analyses.}, language = {en} } @incollection{LuengenKeibel2013, author = {L{\"u}ngen, Harald and Keibel, Holger}, title = {Zur Erstellung und Interpretation der Zeitverlaufsgrafiken}, series = {Neuer Wortschatz. Neologismen im Deutschen 2001-2010. Band 2: kiten - Z}, editor = {Steffens, Doris and al-Wadi, Doris}, edition = {1. Auflage}, publisher = {Institut f{\"u}r Deutsche Sprache}, address = {Mannheim}, isbn = {978-3-937241-43-2}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-47870}, pages = {561 -- 567}, year = {2013}, language = {de} } @incollection{LuengenKeibel2014, author = {L{\"u}ngen, Harald and Keibel, Holger}, title = {Zur Erstellung und Interpretation der Zeitverlaufsgrafiken}, series = {Neuer Wortschatz. Neologismen im Deutschen 2001-2010. Band 2: kiten - Z}, editor = {Steffens, Doris and al-Wadi, Doris}, edition = {2., durchgesehene Auflage}, publisher = {Institut f{\"u}r Deutsche Sprache}, address = {Mannheim}, isbn = {978-3-937241-43-2}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-47888}, pages = {561 -- 567}, year = {2014}, language = {de} } @incollection{LuengenKeibel2015, author = {L{\"u}ngen, Harald and Keibel, Holger}, title = {Zur Erstellung und Interpretation der Zeitverlaufsgrafiken}, series = {Neuer Wortschatz. Neologismen im Deutschen 2001-2010. Band 2: kiten - Z}, editor = {Steffens, Doris and al-Wadi, Doris}, edition = {3., durchgesehene Auflage}, publisher = {Institut f{\"u}r Deutsche Sprache}, address = {Mannheim}, isbn = {978-3-937241-43-2}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-47893}, pages = {561 -- 567}, year = {2015}, language = {de} } @inproceedings{SperbergMcQueenSchonefeldKupietzetal.2013, author = {Sperberg-McQueen, Christopher M. and Schonefeld, Oliver and Kupietz, Marc and L{\"u}ngen, Harald and Witt, Andreas}, title = {Igel: Comparing document grammars using XQuery}, series = {Proceedings of Balisage. The Markup Conference 2013}, issn = {1947-2609}, doi = {10.4242/BalisageVol10.Schonefeld01}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-47919}, series = {Balisage Series on Markup Technologies}, number = {10}, pages = {ungez{\"a}hlte Seiten}, year = {2013}, abstract = {Igel is a small XQuery-based web application for examining a collection of document grammars; in particular, for comparing related document grammars to get a better overview of their differences and similarities. In its initial form, Igel reads only DTDs and provides only simple lists of constructs in them (elements, attributes, notations, parameter entities). Our continuing work is aimed at making Igel provide more sophisticated and useful information about document grammars and building the application into a useful tool for the analysis (and the maintenance!) of families of related document grammars}, language = {en} } @inproceedings{BeisswengerChanierChiarietal.2016, author = {Beißwenger, Michael and Chanier, Thierry and Chiari, Isabella and Erjavec, Tomaž and Fišer, Darja and Herold, Axel and Ljubešić, Nikola and L{\"u}ngen, Harald and Poudat, C{\´e}line and Stemle, Egon W. and Storrer, Angelika and Wigham, Ciara}, title = {Integrating corpora of computer-mediated communication into the language resources landscape: Initiatives and best practices from French, German, Italian and Slovenian projects}, series = {Proceedings of the 5th CLARIN Annual Conference. Aix-en-Provence, France. 26-28 October, 2016}, editor = {Borin, Lars}, publisher = {CLARIN}, address = {Utrecht}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-58053}, pages = {5}, year = {2016}, abstract = {The paper presents best practices and results from projects in four countries dedicated to the creation of corpora of computer-mediated communication and social media interactions (CMC). Even though there are still many open issues related to building and annotating corpora of that type, there already exists a range of accessible solutions which have been tested in projects and which may serve as a starting point for a more precise discussion of how future standards for CMC corpora may (and should) be shaped like.}, language = {en} } @inproceedings{BeisswengerEhrhardtHeroldetal.2016, author = {Beißwenger, Michael and Ehrhardt, Eric and Herold, Axel and L{\"u}ngen, Harald and Storrer, Angelika}, title = {Converting and Representing Social Media Corpora into TEI: Schema and best practices from CLARIN-D}, series = {TEI Conference and Members' Meeting 2016. Book of Abstracts}, editor = {Resch, Claudia and Hannesschl{\"a}ger, Vanessa and Wissik, Tanja}, publisher = {Austrian Centre for Digital Humanities, Austrian Academy of Sciences}, address = {Wien}, isbn = {978-3-200-04689-4}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-55736}, pages = {39 -- 41}, year = {2016}, abstract = {The paper presents results from a curation project within CLARIN-D, in which an existing lMWord corpus of German chat communication has been integrated into the DEREKO and DWDS corpus infrastructures of the CLARIN-D centres at the Institute for the German Language (IDS, Mannheim) and at the Berlin-Brandenburg Academy of Sciences (BBAW, Berlin). The focus is on the solutions developed for converting and representing the corpus in a TEI format.}, language = {en} } @incollection{LuengenBeisswengerEhrhardtetal.2016, author = {L{\"u}ngen, Harald and Beißwenger, Michael and Ehrhardt, Eric and Herold, Axel and Storrer, Angelika}, title = {Integrating corpora of computer-mediated communication in CLARIN-D: Results from the curation project ChatCorpus2CLARIN}, series = {Proceedings of the 13th Conference on Natural Language Processing (KONVENS)}, editor = {Dipper, Stefanie and Neubarth, Friedrich and Zinsmeister, Heike}, publisher = {Sprachwissenschaftliches Institut, Ruhr-Universit{\"a}t Bochum}, address = {Bochum}, issn = {2190-0949}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-55743}, series = {Bochumer Linguistische Arbeitsberichte}, number = {16}, pages = {156 -- 164}, year = {2016}, abstract = {We introduce our pipeline to integrate CMC and SM corpora into the CLARIN-D corpus infrastructure. The pipeline was developed by transforming an existing CMC corpus, the Dortmund Chat Corpus, into a resource conforming to current technical and legal standards. We describe how the resource has been prepared and restructured in terms of TEI encoding, linguistic annotations, and anonymisation. The output is a CLARIN-conformant resource integrated in the CLARIN-D research infrastructure.}, language = {en} } @inproceedings{BeisswengerHeroldLuengenetal.2016, author = {Beißwenger, Michael and Herold, Axel and L{\"u}ngen, Harald and Storrer, Angelika}, title = {Das Dortmunder Chat-Korpus in CLARIN-D: Modellierung und Mehrwerte}, series = {DHd 2016. Modellierung - Vernetzung - Visualisierung. Die Digital Humanities als f{\"a}cher{\"u}bergreifendes Forschungsparadigma. Konferenzabstracts}, publisher = {nisaba}, address = {Duisburg}, isbn = {978-3-941379-05-3}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-55788}, pages = {274 -- 277}, year = {2016}, language = {de} } @inproceedings{BeisswengerEhrhardtHeroldetal.2016, author = {Beißwenger, Michael and Ehrhardt, Eric and Herold, Axel and L{\"u}ngen, Harald and Storrer, Angelika}, title = {(Best) Practices for Annotating and Representing CMC and Social Media Corpora in CLARIN-D}, series = {Proceedings of the 4th Conference on CMC and Social Media Corpora for the Humanities}, editor = {Fišer, Darja and Beißwenger, Michael}, publisher = {Academic Publishing Division of the Faculty of Arts of the University of Ljubljana}, address = {Ljubljana}, isbn = {978-961-237-859-2}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-55810}, pages = {7 -- 11}, year = {2016}, abstract = {The paper reports the results of the curation project ChatCorpus2CLARIN. The goal of the project was to develop a workflow and resources for the integration of an existing chat corpus into the CLARIN-D research infrastructure for language resources and tools in the Humanities and the Social Sciences (http://clarin-d.de). The paper presents an overview of the resources and practices developed in the project, describes the added value of the resource after its integration and discusses, as an outlook, to what extent these practices can be considered best practices which may be useful for the annotation and representation of other CMC and social media corpora.}, language = {en} } @inproceedings{BeisswengerChanierChiarietal.2016, author = {Beißwenger, Michael and Chanier, Thierry and Chiari, Isabella and Erjavec, Tomaž and Fišer, Darja and Herold, Axel and Ljubešić, Nikola and L{\"u}ngen, Harald and Poudat, C{\´e}line and Stemle, Egon W. and Storrer, Angelika and Wigham, Ciara}, title = {Integrating corpora of computer-mediated communication into the language resources landscape: Initiatives and best practices from French, German, Italian and Slovenian projects}, series = {Proceedings CLARIN Annual Conference 2016}, editor = {De Smedt, Koenraad and Odijk, Jan and M{\"o}rth, Karlheinz}, publisher = {Clarin}, address = {Aix-en-Provence}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-55836}, pages = {5}, year = {2016}, abstract = {The paper presents best practices and results from projects in four countries dedicated to the creation of corpora of computer-mediated communication and social media interactions (CMC). Even though there are still many open issues related to building and annotating corpora of that type, there already exists a range of accessible solutions which have been tested in projects and which may serve as a starting point for a more precise discussion of how future standards for CMC corpora may (and should) be shaped like.}, language = {en} } @article{WittGoeckeSasakietal.2005, author = {Witt, Andreas and Goecke, Daniela and Sasaki, Felix and L{\"u}ngen, Harald}, title = {Unification of XML Documents with Concurrent Markup}, series = {Literary and Linguistic Computing}, volume = {20}, number = {1}, publisher = {Oxford University Press}, address = {Oxford}, issn = {1477-4615}, doi = {10.1093/llc/fqh046}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45269}, pages = {103 -- 116}, year = {2005}, abstract = {An approach to the unification of XML (Extensible Markup Language) documents with identical textual content and concurrent markup in the framework of XML-based multi-layer annotation is introduced. A Prolog program allows the possible relationships between element instances on two annotation layers that share PCDATA to be explored and also the computing of a target node hierarchy for a well-formed, merged XML document. Special attention is paid to identity conflicts between element instances, for which a default solution that takes into account metarelations that hold between element types on the different annotation layers is provided. In addition, rules can be specified by a user to prescribe how identity conflicts should be solved for certain element types.}, language = {en} } @inproceedings{GoeckeLuengenSasakietal.2005, author = {Goecke, Daniela and L{\"u}ngen, Harald and Sasaki, Felix and Witt, Andreas and Farrar, Scott}, title = {GOLD and Discourse: Domain- and Community-Specific Extensions}, series = {Proceedings of the E-MELD Workshop on Morphosyntactic Annotation and Terminology: Linguistic Ontologies and Data Categories for Language Resources}, publisher = {E-MELD}, address = {Boston}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45321}, pages = {8}, year = {2005}, language = {en} } @incollection{GoeckeLuengenMetzingetal.2010, author = {Goecke, Daniela and L{\"u}ngen, Harald and Metzing, Dieter and St{\"u}hrenberg, Maik and Witt, Andreas}, title = {Different Views on Markup}, editor = {Witt, Andreas and Metzing, Dieter}, publisher = {Springer}, address = {Dordrecht}, isbn = {978-90-481-3330-7}, doi = {10.1007/978-90-481-3331-4_1}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-44966}, series = {Text, Speech and Language Technology}, number = {41}, pages = {1 -- 22}, year = {2010}, abstract = {In this chapter, two different ways of grouping information represented in document markup are examined: annotation levels, referring to conceptual levels of description, and annotation layers, referring to the technical realisation of markup using e.g. document grammars. In many current XML annotation projects, multiple levels are integrated into one layer, often leading to the problem of having to deal with overlapping hierarchies. As a solution, we propose a framework for XML-based multiple, independent XML annotation layers for one text, based on an abstract representation of XML documents with logical predicates. Two realisations of the abstract representation are presented, a Prolog fact base format together with an application architecture, and a specification for XML native databases. We conclude with a discussion of projects that have currently adopted this framework.}, language = {en} } @inproceedings{WittLuengenSasakietal.2004, author = {Witt, Andreas and L{\"u}ngen, Harald and Sasaki, Felix and Goecke, Daniela}, title = {Unification of XML Documents with Concurrent Markup}, series = {ALLCACH2004, Joint Conference of the ALLC and ACH, G{\"o}teborg}, publisher = {University of G{\"o}teborg}, address = {G{\"o}teborg}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45391}, pages = {5}, year = {2004}, language = {en} } @incollection{BelicaKupietzWittetal.2009, author = {Belica, Cyril and Kupietz, Marc and Witt, Andreas and L{\"u}ngen, Harald}, title = {The Morphosyntactic Annotation of DeReKo: Interpretation, Opportunities, and Pitfalls}, series = {Grammatik und Korpora 2009. Dritte Internationale Konferenz. Mannheim, 22.-24.9.2009}, editor = {Konopka, Marek and Kubczak, Jacqueline and Mair, Christian and Šticha, František and Waßner, Ulrich Hermann}, publisher = {Narr}, address = {T{\"u}bingen}, isbn = {978-3-8233-6648-5}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-44890}, series = {Korpuslinguistik und interdisziplin{\"a}re Perspektiven auf Sprache}, number = {1}, pages = {451 -- 469}, year = {2009}, abstract = {The paper discusses from various angles the morphosyntactic annotation of DeReKo, the Archive of General Reference Corpora of Contemporary Written German at the Institut f{\"u}r Deutsche Sprache (IDS), Mannheim. The paper is divided into two parts. The first part covers the practical and technical aspects of this endeavor. We present results from a recent evaluation of tools for the annotation of German text resources that have been applied to DeReKo. These tools include commercial products, especially Xerox' Finite State Tools and the Machinese products developed by the Finnish company Connexor Oy, as well as software for which academic licenses are available free of charge for academic institutions, e.g. Helmut Schmid's Tree Tagger. The second part focuses on the linguistic interpretability of the corpus annotations and more general methodological considerations concerning scientifically sound empirical linguistic research. The main challenge here is that unlike the texts themselves, the morphosyntactic annotations of DeReKo do not have the status of observed data; instead they constitute a theory and implementation-dependent interpretation. In addition, because of the enormous size of DeReKo, a systematic manual verification of the automatic annotations is not feasible. In consequence, the expected degree of inaccuracy is very high, particularly wherever linguistically challenging phenomena, such as lexical or grammatical variation, are concerned. Given these facts, a researcher using the annotations blindly will run the risk of not actually studying the language but rather the annotation tool or the theory behind it. The paper gives an overview of possible pitfalls and ways to circumvent them and discusses the opportunities offered by using annotations in corpus-based and corpus-driven grammatical research against the background of a scientifically sound methodology.}, language = {en} } @incollection{MehlerKuehnbergerLobinetal.2011, author = {Mehler, Alexander and K{\"u}hnberger, Kai-Uwe and Lobin, Henning and L{\"u}ngen, Harald and Storrer, Angelika and Witt, Andreas}, title = {Introduction: Modeling, Learning and Processing of Text-Technological Data Structures}, series = {Modeling, Learning, and Processing of Text-Technological Data Structures}, editor = {Mehler, Alexander and K{\"u}hnberger, Kai-Uwe and Lobin, Henning and L{\"u}ngen, Harald and Storrer, Angelika and Witt, Andreas}, publisher = {Springer}, address = {Berlin/Heidelberg}, isbn = {978-3-642-22612-0}, doi = {10.1007/978-3-642-22613-7_1}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45067}, series = {Studies in Computational Intelligence}, number = {370}, pages = {1 -- 11}, year = {2011}, abstract = {Researchers in many disciplines, sometimes working in close cooperation, have been concerned with modeling textual data in order to account for texts as the prime information unit of written communication. The list of disciplines includes computer science and linguistics as well as more specialized disciplines like computational linguistics and text technology. What many of these efforts have in common is the aim to model textual data by means of abstract data types or data structures that support at least the semi-automatic processing of texts in any area of written communication.}, language = {en} } @inproceedings{LuengenWitt2008, author = {L{\"u}ngen, Harald and Witt, Andreas}, title = {Multi-Dimensional Markup: N-way relations as a generalisation over possible relations between annotation layers}, series = {Digital Humanities 2008. Book of Abstracts}, editor = {Opas-H{\"a}nninen, Lisa Lena and Jokelainen, Mikko and Juuso, Ilkka and Sepp{\"a}nen, Tapio}, publisher = {University of Oulu}, address = {Oulu}, isbn = {978-951-42-8838-8}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45104}, pages = {254 -- 255}, year = {2008}, language = {en} } @inproceedings{WittLuengenGibbon2000, author = {Witt, Andreas and L{\"u}ngen, Harald and Gibbon, Dafydd}, title = {Enhancing speech corpus resources with multiple lexical tag layers}, series = {Proceedings of the 2nd International Conference on Language Resources and Evaluation (LREC-2000). Athen, Griechenland}, publisher = {European Language Resources Association (ELRA)}, address = {Paris}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45517}, pages = {5}, year = {2000}, abstract = {We describe a general two-stage procedure for re-using a custom corpus for spoken language system development involving a transformation from character-based markup to XML, and DSSSL stylesheet-driven XML markup enhancement with multiple lexical tag trees. The procedure was used to generate a fully tagged corpus; alternatively with greater economy of computing resources, it can be employed as a parametrised 'tagging on demand' filter. The implementation will shortly be released as a public resource together with the corpus (German spoken dialogue, about 500k word form tokens) and lexicon (about 75k word form types).}, language = {en} } @incollection{LuengenPuskasBaerenfaengeretal.2006, author = {L{\"u}ngen, Harald and Pusk{\´a}s, Csilla and B{\"a}renf{\"a}nger, Maja and Hilbert, Mirco and Lobin, Henning}, title = {Discourse segmentation of German written texts}, series = {Advance in natural language processing. 5th International Conference on NLP FinTAL 2006 Turku, Finnland, August 23-25}, editor = {Pahikkala, Tapio and Pyysalo, Sampo and Ginter, Filip and Salakoski, Tapio}, publisher = {Springer-Verlag}, address = {Berlin [u.a.]}, isbn = {978-3-540-37334-6}, doi = {10.1007/11816508_26}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-23}, pages = {245 -- 256}, year = {2006}, abstract = {Discourse segmentation is the division of a text into minimal discourse segments, which form the leaves in the trees that are used to represent discourse structures. A definition of elementary discourse segments in German is provided by adapting widely used segmentation principles for English minimal units, while considering punctuation, morphology, sytax, and aspects of the logical document structure of a complex text type, namely scientific articles. The algorithm and implementation of a discourse segmenter based on these principles is presented, as well an evaluation of test runs.}, language = {en} } @inproceedings{LuengenBaerenfaengerHilbertetal.2006, author = {L{\"u}ngen, Harald and Baerenfaenger, Maja and Hilbert, Mirco and Lobin, Henning and Pusk{\´a}s, Csilla}, title = {Text parsing of a complex genre}, series = {ELPUB 2006. Digital Spectrum: Integrating Technology and Culture - Proceedings of the 10th International Conference on Electronic Publishing held in Bansko. ELPUB 2006, Bansko, Bulgaria, June 14-16}, editor = {Dobreva, Milena and Martens, Bob}, publisher = {Foi-Commerce}, address = {Sofia}, isbn = {978-954-16-0040-5}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84}, pages = {247 -- 256}, year = {2006}, abstract = {A text parsing component designed to be part of a system that assists students in academic reading an writing is presented. The parser can automatically add a relational discourse structure annotation to a scientific article that a user wants to explore. The discourse structure employed is defined in an XML format and is based the Rhetorical Structure Theory. The architecture of the parser comprises pre-processing components which provide an input text with XML annotations on different linguistic and structural layers. In the first version these are syntactic tagging, lexical discourse marker tagging, logical document structure, and segmentation into elementary discourse segments. The algorithm is based on the shift-reduce parser by Marcu (2000) and is controlled by reduce operations that are constrained by linguistic conditions derived from an XML-encoded discourse marker lexicon. The constraints are formulated over multiple annotation layers of the same text.}, language = {en} } @inproceedings{LangerLuengenBayerl2004, author = {Langer, Hagen and L{\"u}ngen, Harald and Bayerl, Petra Saskia}, title = {Text type structure and logical document structure}, series = {Proceedings of the ACL-workshop on discource annotation}, volume = {2004}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-92}, year = {2004}, abstract = {Most research on automated categorization of documents has concentrated on the assignment of one or many categories to a whole text. However, new applications, e.g. in the area of the Semantic Web, require a richer and more fine-grained annotation of documents, such as detailed thematic information about the parts of a document. Hence we investigate the automatic categorization of text segments of scientific articles with XML markup into 16 topic types from a text type structure schema. A corpus of 47 linguistic articles was provided with XML markup on different annotation layers representing text type structure, logical document structure, and grammatical categories. Six different feature extraction strategies were applied to this corpus and combined in various parametrizations in different classifiers. The aim was to explore the contribution of each type of information, in particular the logical structure features, to the classification accuracy. The results suggest that some of the topic types of our hierarchy are successfully learnable, while the features from the logical structure layer had no particular impact on the results.}, language = {de} } @article{LuengenSperbergMcQueen2012, author = {L{\"u}ngen, Harald and Sperberg-McQueen, Christopher M.}, title = {A TEI P5 Document Grammar for the IDS Text Model}, series = {Journal of the Text Encoding Initiative}, number = {3}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-2367}, pages = {1 -- 18}, year = {2012}, abstract = {This paper describes work in progress on I5, a TEI-based document grammar for the corpus holdings of the Institut f{\"u}r Deutsche Sprache (IDS) in Mannheim and the text model used by IDS in its work. The paper begins with background information on the nature and purposes of the corpora collected at IDS and the motivation for the I5 project (section 1). It continues with a description of the origin and history of the IDS text model (section 2), and a description (section 3) of the techniques used to automate, as far as possible, the preparation of the ODD file documenting the IDS text model. It ends with some concluding remarks (section 4). A survey of the additional features of the IDS-XCES realization of the IDS text model is given in an appendix.}, language = {en} } @inproceedings{BeisswengerLuengenMargarethaetal.2014, author = {Beißwenger, Michael and L{\"u}ngen, Harald and Margaretha, Eliza and P{\"o}litz, Christian}, title = {Mining corpora of computer-mediated communication: analysis of linguistic features in Wikipedia talk pages using machine learning methods}, series = {Proceedings of the 12th edition of the KONVENS conference Vol. 1}, editor = {Faaß, Gertrud and Ruppenhofer, Josef}, publisher = {Universit{\"a}t Hildesheim}, address = {Hildesheim}, url = {http://nbn-resolving.de/urn:nbn:de:gbv:hil2-opus-2893}, pages = {42 -- 47}, year = {2014}, abstract = {Machine learning methods offer a great potential to automatically investigate large amounts of data in the humanities. Our contribution to the workshop reports about ongoing work in the BMBF project KobRA (http://www.kobra.tu-dortmund.de) where we apply machine learning methods to the analysis of big corpora in language-focused research of computer-mediated communication (CMC). At the workshop, we will discuss first results from training a Support Vector Machine (SVM) for the classification of selected linguistic features in talk pages of the German Wikipedia corpus in DeReKo provided by the IDS Mannheim. We will investigate different representations of the data to integrate complex syntactic and semantic information for the SVM. The results shall foster both corpus-based research of CMC and the annotation of linguistic features in CMC corpora.}, language = {en} } @incollection{KupietzBelicaLuengenetal.2014, author = {Kupietz, Marc and Belica, Cyril and L{\"u}ngen, Harald and Perkuhn, Rainer}, title = {Zwischen Empirie und {\"A}sthetik - Ans{\"a}tze zur korpuslinguistischen Untersuchung und Bewertung von Sprachwandel}, series = {Sprachverfall? Dynamik - Wandel - Variation}, editor = {Plewnia, Albrecht and Witt, Andreas}, edition = {Zweitver{\"o}ffentlichung}, publisher = {de Gruyter}, address = {Berlin/Boston}, isbn = {978-3-11-034291-8}, doi = {10.1515/9783110343007.149}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-31091}, series = {Jahrbuch / Institut f{\"u}r Deutsche Sprache}, number = {2013.}, pages = {149 -- 169}, year = {2014}, abstract = {Der Beitrag besch{\"a}ftigt sich mit der Frage, wie und inwieweit korpusbasierte Ans{\"a}tze zur Untersuchung und Bewertung von Sprachwandel beitragen k{\"o}nnen. Die Bewertung von Sprachwandel erscheint in dieser Hinsicht interessant, da sie erstens von gr{\"o}ßerem {\"o}ffentlichen Interesse ist, zweitens nicht zu den Kernthemen der Sprachwissenschaft z{\"a}hlt und drittens sowohl die geisteswissenschaftlichen Aspekte der Sprachwissenschaft ber{\"u}hrt als auch die empirischen, die eher f{\"u}r die so genannten harten Wissenschaften typisch sind. Letzteres trifft bei der Frage nach Sprachverfall (gutem vs. schlechtem Deutsch diachron) vermutlich unbestrittener zu als bei der Frage nach richtigem vs. falschem Deutsch, da zu ihrer Beantwortung offensichtlich einerseits empirische, messbare Kriterien herangezogen werden m{\"u}ssen, andererseits aber auch weitere Kriterien notwendig sind und es außerdem einer Entscheidung zur Einordnung und Gewichtung der verschiedenartigen Kriterien sowie einer Begr{\"u}ndung dieser Entscheidung bedarf. Zur Ann{\"a}herung an die Fragestellung werden zun{\"a}chst g{\"a}ngige, leicht operationalisierbare Hypothesen zu Symptomen eines potenziellen Verfalls des Deutschen auf verschiedenen DeReKo-basierten Korpora {\"u}berpr{\"u}ft und im Hinblick auf ihre Verallgemeinerbarkeit und Tragweite diskutiert. Im zweiten Teil werden weitere empirische Ans{\"a}tze zur Untersuchung von Wandel, Variation und Dynamik skizziert, die zur Diskussion spezieller Aspekte von Sprachverfall beitragen k{\"o}nnten. Im Schlussteil werden die vorgestellten Ans{\"a}tze in den Gesamtkontext einer sprachwissenschaftlichen Untersuchung von Sprachverfall gestellt und vor dem Hintergrund seines gesellschaftlichen Diskurses reflektiert.}, language = {de} } @inproceedings{KupietzLuengen2014, author = {Kupietz, Marc and L{\"u}ngen, Harald}, title = {Recent developments in DeReKo}, series = {Proceedings of the ninth conference on international language resources and evaluation (LREC'14)}, publisher = {European Language Resources Association (ELRA)}, address = {Reykjavik}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-31353}, pages = {2385}, year = {2014}, abstract = {This paper gives an overview of recent developments in the German Reference Corpus DeReKo in terms of growth, maximising relevant corpus strata, metadata, legal issues, and its current and future research interface. Due to the recent acquisition of new licenses, DeReKo has grown by a factor of four in the first half of 2014, mostly in the area of newspaper text, and presently contains over 24 billion word tokens. Other strata, like fictional texts, web corpora, in particular CMC texts, and spoken but conceptually written texts have also increased significantly. We report on the newly acquired corpora that led to the major increase, on the principles and strategies behind our corpus acquisition activities, and on our solutions for the emerging legal, organisational, and technical challenges.}, language = {en} } @incollection{PerkuhnBelicaKeibeletal.2015, author = {Perkuhn, Rainer and Belica, Cyril and Keibel, Holger and Kupietz, Marc and L{\"u}ngen, Harald}, title = {Valenz und Kookkurrenz}, series = {Valenz im Fokus. Grammatische und lexikografische Studien; Festschrift f{\"u}r Jacqueline Kubczak}, editor = {Dominguez V{\´a}zquez, Maria Jos{\´e} and Eichinger, Ludwig M.}, publisher = {Institut f{\"u}r Deutsche Sprache}, address = {Mannheim}, isbn = {978-3-937241-49-4}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-35442}, pages = {175 -- 196}, year = {2015}, language = {de} } @article{MargarethaLuengen2014, author = {Margaretha, Eliza and L{\"u}ngen, Harald}, title = {Building linguistic corpora from Wikipedia articles and discussions}, series = {Journal of Language Technology and Computational Linguistics. Special issue on building and annotating corpora of computer-mediated communication. Issues and challenges at the interface between computational and corpus linguistics}, volume = {29}, number = {2}, editor = {Beißwenger, Michael and Storrer, Angelika and Oostdijk, Nelleke and van den Heuvel, Henk}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-33306}, pages = {59 -- 82}, year = {2014}, abstract = {Wikipedia is a valuable resource, useful as a lingustic corpus or a dataset for many kinds of research. We built corpora from Wikipedia articles and talk pages in the I5 format, a TEI customisation used in the German Reference Corpus (Deutsches Referenzkorpus - DeReKo). Our approach is a two-stage conversion combining parsing using the Sweble parser, and transformation using XSLT stylesheets. The conversion approach is able to successfully generate rich and valid corpora regardless of languages. We also introduce a method to segment user contributions in talk pages into postings.}, language = {de} } @article{Luengen2017, author = {L{\"u}ngen, Harald}, title = {DEREKO - Das Deutsche Referenzkorpus. Schriftkorpora der deutschen Gegenwartssprache am Institut f{\"u}r Deutsche Sprache in Mannheim}, series = {Zeitschrift f{\"u}r germanistische Linguistik}, volume = {45}, number = {1}, publisher = {de Gruyter}, address = {Berlin/New York}, issn = {1613-0626}, doi = {10.1515/zgl-2017-0008}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-61065}, pages = {161 -- 170}, year = {2017}, language = {de} } @inproceedings{LuengenKupietz2017, author = {L{\"u}ngen, Harald and Kupietz, Marc}, title = {CMC Corpora in DeReKo}, series = {Proceedings of the Workshop on Challenges in the Management of Large Corpora and Big Data and Natural Language Processing (CMLC-5+BigNLP) 2017 including the papers from the Web-as-Corpus (WAC-XI) guest section. Birmingham, 24 July 2017}, editor = {Bański, Piotr and Kupietz, Marc and L{\"u}ngen, Harald and Rayson, Paul and Biber, Hanno and Breiteneder, Evelyn and Clematide, Simon and Mariani, John and Stevenson, Mark and Sick, Theresa}, publisher = {Institut f{\"u}r Deutsche Sprache}, address = {Mannheim}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-62592}, pages = {20 -- 24}, year = {2017}, abstract = {We introduce three types of corpora of computer-mediated communication that have recently been compiled at the Institute for the German Language or curated from an external project and included in DeReKo, the German Reference Corpus, namely Wikipedia (discussion) corpora, the Usenet news corpus, and the Dortmund Chat Corpus. The data and corpora have been converted to I5, the TEI customization to represent texts in DeReKo, and are researchable via the web-based IDS corpus research interfaces and in the case of Wikipedia and chat also downloadable from the IDS repository and download server, respectively.}, language = {en} } @inproceedings{BeisswengerChanierErjavecetal.2017, author = {Beißwenger, Michael and Chanier, Thierry and Erjavec, Tomaž and Fišer, Darja and Herold, Axel and Ljubešić, Nikola and L{\"u}ngen, Harald and Poudat, C{\´e}line and Stemle, Egon W. and Storrer, Angelika and Wigham, Ciara}, title = {Closing a Gap in the Language Resources Landscape: Groundwork and Best Practices from Projects on Computer-mediated Communication in four European Countries}, series = {Selected papers from the CLARIN Annual Conference 2016, Aix-en-Provence, 26-28 October 2016, CLARIN Common Language Resources and Technology Infrastructure}, editor = {Borin, Lars}, publisher = {Link{\"o}ping University}, address = {Link{\"o}ping}, isbn = {978-91-7685-499-0}, issn = {1650-3740}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-62534}, series = {Link{\"o}ping Electronic Conference Proceedings}, number = {136}, pages = {1 -- 18}, year = {2017}, abstract = {The paper presents best practices and results from projects dedicated to the creation of corpora of computer-mediated communication and social media interactions (CMC) from four different countries. Even though there are still many open issues related to building and annotating corpora of this type, there already exists a range of tested solutions which may serve as a starting point for a comprehensive discussion on how future standards for CMC corpora could (and should) be shaped like.}, language = {en} } @incollection{KupietzLuengenBańskietal.2014, author = {Kupietz, Marc and L{\"u}ngen, Harald and Bański, Piotr and Belica, Cyril}, title = {Maximizing the potential of very large corpora: 50 years of big language data at IDS Mannheim}, series = {Proceedinigs of the LREC-2014-workshop challenges in the management of large corpora (CMLC2)}, publisher = {ELRA}, address = {Reykjavik}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-31634}, pages = {1 -- 6}, year = {2014}, abstract = {Very large corpora have been built and used at the IDS since its foundation in 1964. They have been made available on the Internet since the beginning of the 90's to currently over 30,000 researchers worldwide. The Institute provides the largest archive of written German (Deutsches Referenzkorpus, DeReKe) which has recently been extended to 24 billion words. DeReKe has been managed and analysed by engines known as COSMAS and afterwards COSMAS II, which is currently being replaced by a new, scalable analysis platform called KorAP. KorAP makes it possible to manage and analyse texts that are accompanied by multiple, potentially conflicting, grammatical and structural annotation layers, and is able to handle resources that are distributed across different, and possibly geographically distant, storage systems. The majority of texts in DeReKe are not licensed for free redistribution, hence, the COSMAS and KorAP systems offer technical solutions to facilitate research on very large corpora that are not available (and not suitable) for download. For the new KorAP system, it is also planned to provide sandboxed environments to support non-remote-API access "near the data" through which users can run their own analysis programs.}, language = {en} } @inproceedings{SchroeckLuengen2015, author = {Schr{\"o}ck, Jasmin and L{\"u}ngen, Harald}, title = {Building and Annotating a Corpus of German-Language Newsgroups}, series = {NLP4CMC 2015. 2nd Workshop on Natural Language Processing for Computer-Mediated Communication / Social Media. Proceedings of the Workshop , September 29, 2015 University of Duisburg-Essen, Campus Essen}, editor = {Beißwenger, Michael and Zesch, Torsten}, publisher = {German Society for Computational Linguistics \& Language Technology (GSCL)}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-43640}, pages = {17 -- 22}, year = {2015}, abstract = {Usenet is a large online resource containing user-generated messages (news articles) organised in discussion groups (newsgroups) which deal with a wide variety of different topics. We describe the download, conversion, and annotation of a comprehensive German news corpus for integration in DeReKo, the German Reference Corpus hosted at the Institut f{\"u}r Deutsche Sprache in Mannheim.}, language = {en} } @inproceedings{BeisswengerEhrhardtHorbachetal.2015, author = {Beißwenger, Michael and Ehrhardt, Eric and Horbach, Andrea and L{\"u}ngen, Harald and Steffen, Diana and Storrer, Angelika}, title = {Adding Value to CMC Corpora: CLARINification and Part-of-speech Annotation of the Dortmund Chat Corpus}, series = {NLP4CMC 2015. 2nd Workshop on Natural Language Processing for Computer-Mediated Communication / Social Media. Proceedings of the Workshop, September 29, 2015 University of Duisburg-Essen, Campus Essen}, editor = {Beißwenger, Michael and Zesch, Torsten}, publisher = {German Society for Computational Linguistics \& Language Technology (GSCL)}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-43654}, pages = {12 -- 16}, year = {2015}, language = {en} } @incollection{HilbertLuengenBaerenfaengeretal.2008, author = {Hilbert, Mirco and L{\"u}ngen, Harald and B{\"a}renf{\"a}nger, Maja and Lobin, Henning}, title = {Demonstration des SemDok-Textparsers}, series = {KONVENS 2008 - Erg{\"a}nzungsband. Textressourcen und lexikalisches Wissen}, editor = {Storrer, Angelika and Geyken, Alexander and Siebert, Alexander and W{\"u}rzner, Kay-Michael}, publisher = {BBAW}, address = {Berlin}, isbn = {978-3-00-025611-0}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-43032}, pages = {21 -- 27}, year = {2008}, abstract = {Im Teilprojekt CI "SemDok" der DFG-Forschergruppe Texttechnologische Informationsmodellierung wurde ein Textparser f{\"u}r Diskursstrukturen wissenschaftlicher Zeitschriftenartikel nach der Rhetorical Structure Theory entwickelt. Die wesentlichen konzeptuellen und technischen Merkmale des Chart-Parsers und die sich daraus ergebenden Parametrisierungsm{\"o}glichkeiten f{\"u}r Parsing-Experimente werden beschrieben. Zudem wird HPVtz., ein Tool f{\"u}r die Visualisierung von Parsing-Ergebnissen (RST-B{\"a}ume in einer XML-Anwendung) und die Navigation in ihnen, vorgestellt.}, language = {de} } @inproceedings{LuengenGibbon1999, author = {L{\"u}ngen, Harald and Gibbon, Dafydd}, title = {Consistent Vocabularies for Spoken Language Machine Translation Systems}, series = {GLDV '99. Multilinguale Corpora: Codierung, Strukturierung, Analyse. 11. Jahrestagung der Gesellschaft f{\"u}r Linguistische DatenVerarbeitung. 8.-10.7.1999, Frankfurt a/M}, editor = {Gippert, Jost and Olivier, Peter}, publisher = {enigma corporation}, address = {Prag}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-74583}, pages = {169 -- 178}, year = {1999}, language = {en} } @incollection{KupietzLuengenKamockietal.2018, author = {Kupietz, Marc and L{\"u}ngen, Harald and Kamocki, Paweł and Witt, Andreas}, title = {The German reference corpus DeReKo: new developments - new opportunities}, series = {Proceedings of the eleventh international conference on language resources and evaluation (LREC 2018), 7-12 May 2018, Miyazaki, Japan}, editor = {Calzolari, Nicoletta and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Hasida, Koiti and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H{\´e}l{\`e}ne and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios and Tokunaga, Takenobu}, publisher = {European language resources association (ELRA)}, address = {Paris, France}, isbn = {979-10-95546-00-9}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-74917}, pages = {4354 -- 4360}, year = {2018}, abstract = {This paper discusses current trends in DeReKo, the German Reference Corpus, concerning legal issues around the recent German copyright reform with positive implications for corpus building and corpus linguistics in general, recent corpus extensions in the genres of popular magazines, journals, historical texts, and web-based football reports. Besides, DeReKo is finally accessible via the new corpus research platform KorAP, offering registered users several news features in comparison with its predecessor COSMAS II.}, language = {en} } @inproceedings{LuengenBeisswengerHerzbergetal.2017, author = {L{\"u}ngen, Harald and Beißwenger, Michael and Herzberg, Laura and Pichler, Cathrin}, title = {Anonymisation of the Dortmund Chat Corpus 2.1}, series = {Proceedings of the 5th Conference on CMC and Social Media Corpora for the Humanities (cmccorpora17), 3-4 October 2017, Eurac Research, Italy}, editor = {Stemle, Egon W. and Wigham, Ciara R.}, edition = {First edition}, address = {Bolzano}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-66392}, pages = {21 -- 24}, year = {2017}, abstract = {As a consequence of a recent curation project, the Dortmund Chat Corpus is available in CLARIN-D research infrastructures for download and querying. In a legal expertise it had been recommended that standard measures of anonymisation be applied to the corpus before its republication. This paper reports about the anonymisation campaign that was conducted for the corpus. Anonymisation has been realised as categorisation, and the taxonomy of anonymisation categories applied is introduced and the method of applying it to the TEI files is demonstrated. The results of the anonymisation campaign as well as issues of quality assessment are discussed. Finally, pseudonymisation as an alternative to categorisation as a method of the anonymisation of CMC data is discussed, as well as possibilities of an automatisation of the process.}, language = {en} } @inproceedings{BeisswengerWighamEtienneetal.2017, author = {Beißwenger, Michael and Wigham, Ciara and Etienne, Carole and Fišer, Darja and Grumt Su{\´a}rez, Holger and Herzberg, Laura and Hinrichs, Erhard and Horsmann, Tobias and Karlova-Bourbonus, Natali and Lemnitzer, Lothar and Longhi, Julien and L{\"u}ngen, Harald and Ho-Dac, Lydia-Mai and Parisse, Christophe and Poudat, C{\´e}line and Schmidt, Thomas and Stemle, Egon W. and Storrer, Angelika and Zesch, Torsten}, title = {Connecting resources: Which issues have to be solved to integrate CMC corpora from heterogeneous sources and for different languages?}, series = {Proceedings of the 5th Conference on CMC and Social Media Corpora for the Humanities (cmccorpora17). 3-4 October 2017, Eurac Research, Italy}, editor = {Stemle, Egon W. and Wigham, Ciara R.}, edition = {First edition}, address = {Bolzano}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-66401}, pages = {52 -- 55}, year = {2017}, abstract = {The paper reports on the results of a scientific colloquium dedicated to the creation of standards and best practices which are needed to facilitate the integration of language resources for CMC stemming from different origins and the linguistic analysis of CMC phenomena in different languages and genres. The key issue to be solved is that of interoperability - with respect to the structural representation of CMC genres, linguistic annotations metadata, and anonymization/pseudonymization schemas. The objective of the paper is to convince more projects to partake in a discussion about standards for CMC corpora and for the creation of a CMC corpus infrastructure across languages and genres. In view of the broad range of corpus projects which are currently underway all over Europe, there is a great window of opportunity for the creation of standards in a bottom-up approach.}, language = {en} } @incollection{BeisswengerLuengenSchallaboecketal.2017, author = {Beißwenger, Michael and L{\"u}ngen, Harald and Schallab{\"o}ck, Jan and Weitzmann, John H. and Herold, Axel and Kamocki, Paweł and Storrer, Angelika and Wildgans, Julia}, title = {Rechtliche Bedingungen f{\"u}r die Bereitstellung eines Chat-Korpus in CLARIN-D. Ergebnisse eines Rechtsgutachtens}, series = {Empirische Erforschung internetbasierter Kommunikation}, editor = {Beißwenger, Michael}, publisher = {De Gruyter}, address = {Berlin [u.a.]}, isbn = {978-3-11-056614-7}, doi = {10.1515/9783110567786-002}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-66494}, series = {Empirische Linguistik}, number = {9}, pages = {7 -- 46}, year = {2017}, language = {de} } @incollection{KlosaLuengen2018, author = {Klosa, Annette and L{\"u}ngen, Harald}, title = {New German words: detection and description}, series = {Proceedings of the XVIII EURALEX International Congress Lexicography in Global Contexts 17-21 July 2018, Ljubljana}, editor = {Čibej, Jaka and Gorjanc, Vojko and Kosem, Iztok and Krek, Simon}, publisher = {Znanstvena založba Filozofske fakultete Univerze v Ljubljani / Ljubljana University Press, Faculty of Arts}, address = {Ljubljana}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-77188}, pages = {559 -- 569}, year = {2018}, abstract = {In this paper, we discuss an efficient method of (semi-automatic) neologism detection for German and its application for the production of a dictionary of neologisms, focusing on the lexicographic process. By monitoring the language via editorial (print and online) media evaluation and interpreting the findings on the basis of lexicographic competence, many, but not all neologisms can be identified which qualify for inclusion in the Neologismenworterbuch (2006-today) at the Institute for the German Language in Mannheim (IDS). In addition, an automated corpus linguistic method offers neologism candidates based on a systematic analysis of large amounts of text to lexicographers. We explain the principles of the corpus linguistic compilation of a list of candidates and show how lexicographers work with the results, combining them with their own findings in order to continuously enlarge this specialized online dictionary of new words in German.}, language = {en} } @inproceedings{BaerenfaengerHilbertLobinetal.2007, author = {B{\"a}renf{\"a}nger, Maja and Hilbert, Mirco and Lobin, Henning and L{\"u}ngen, Harald}, title = {Using OWL ontologies in discourse parsing}, series = {Ontologies in Text Technology: Approaches to Extract Semantic Knowledge from Structured Information}, publisher = {Institut f{\"u}r Kognitionswissenschaft Universit{\"a}t Osnabr{\"u}ck}, address = {Osnabr{\"u}ck}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-77234}, pages = {1 -- 6}, year = {2007}, language = {en} } @techreport{WittLuengenGibbon1997, type = {Working Paper}, author = {Witt, Andreas and L{\"u}ngen, Harald and Gibbon, Dafydd}, title = {Standardisierung orthographischer Transkriptionen: Ein SGML/TEI-basierter Vorschlag f{\"u}r VERBMOBIL (VM-Memo 117)}, series = {Verbmobil}, publisher = {Universit{\"a}t Bielefeld}, address = {Bielefeld}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-75945}, pages = {1 -- 26}, year = {1997}, language = {de} } @inproceedings{KupietzMargarethaDiewaldetal.2019, author = {Kupietz, Marc and Margaretha, Eliza and Diewald, Nils and L{\"u}ngen, Harald and Fankhauser, Peter}, title = {What's New in EuReCo? Interoperability, Comparable Corpora, Licensing}, series = {Proceedings of the Workshop on Challenges in the Management of Large Corpora (CMLC-7) 2019. Cardiff, 22nd July 2019}, editor = {Bański, Piotr and Barbaresi, Adrien and Biber, Hanno and Breiteneder, Evelyn and Clematide, Simon and Kupietz, Marc and L{\"u}ngen, Harald and Iliadi, Caroline}, publisher = {Leibniz-Institut f{\"u}r Deutsche Sprache}, address = {Mannheim}, doi = {10.14618/ids-pub-9026}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-90261}, pages = {33 -- 39}, year = {2019}, abstract = {This paper reports on the latest developments of the European Reference Corpus EuReCo and the German Reference Corpus in relation to three of the most important CMLC topics: interoperability, collaboration on corpus infrastructure building, and legal issues. Concerning interoperability, we present new ways to access DeReKo via KorAP on the API and on the plugin level. In addition we report about advancements in the EuReCo- and ICC-initiatives with the provision of comparable corpora, and about recent problems with license acquisitions and our solution approaches using an indemnification clause and model licenses that include scientific exploitation.}, language = {en} } @inproceedings{LuengenLobin2010, author = {L{\"u}ngen, Harald and Lobin, Henning}, title = {Extracting domain knowledge from tables of contents}, series = {Digital Humanities 2010. Conference Abstracts. King's College London, London July 7 - 10, 2010}, publisher = {Office for Humanities Communication; Centre for Computing in the Humanities (King's College London}, address = {London}, isbn = {978-0-9565793-0-0}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-76096}, pages = {331}, year = {2010}, abstract = {Knowledge in textual form is always presented as visually and hierarchically structured units of text, which is particularly true in the case of academic texts. One research hypothesis of the ongoing project Knowledge ordering in texts - text structure and structure visualisations as sources of natural ontologies1 is that the textual structure of academic texts effectively mirrors essential parts of the knowledge structure that is built up in the text. The structuring of a modern dissertation thesis (e.g. in the form of an automatically generated table of contents - toes), for example, represents a compromise between requirements of the text type and the methodological and conceptual structure of its subject-matter. The aim of the project is to examine how visual-hierarchical structuring systems are constructed, how knowledge structures are encoded in them, and how they can be exploited to automatically derive ontological knowledge for navigation, archiving, or search tasks. The idea to extract domain concepts and semantic relations mainly from the structural and linguistic information gathered from tables of contents represents a novel approach to ontology learning.}, language = {en} } @article{BaerenfaengerHilbertLobinetal.2008, author = {B{\"a}renf{\"a}nger, Maja and Hilbert, Mirco and Lobin, Henning and L{\"u}ngen, Harald}, title = {OWL ontologies as a resource for discourse parsing}, series = {LDV-Forum - GLDV-Journal for Computational 
Linguistics and Language Technology}, volume = {23}, number = {1}, publisher = {Gesellschaft f{\"u}r Linguistische Datenverarbeitung}, address = {Bonn}, issn = {0175-1336}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-76105}, pages = {17 -- 26}, year = {2008}, abstract = {In the project SemDok (Generic document structures in linearly organised texts) funded by the German Research Foundation DFG, a discourse parser for a complex type (scientific articles by example), is being developed. Discourse parsing (henceforth DP) according to the Rhetorical Structure Theory (RST) (Mann and Taboada, 2005; Marcu, 2000) deals with automatically assigning a text a tree structure in which discourse segments and rhetorical relations between them are marked, such as Concession. For identifying the combinable segments, declarative rules are employed, which describe linguistic and structural cues and constraints about possible combinations by referring to different XML annotation layers of the input text, and external knowledge bases such as a discourse marker lexicon, a lexico-semantic ontology (later to be combined with a domain ontology), and an ontology of rhetorical relations. In our text-technological environment, the obvious choice of formalism to represent such ontologies is OWL (Smith et al., 2004). In this paper, we describe two OWL ontologies and how they are consulted from the discourse parser to solve certain tasks within DP. The first ontology is a taxononomy of rhetorical relations which was developed in the project. The second one is an OWL version of GermaNet, the model of which we designed together with our project partners.}, language = {en} } @inproceedings{HilbertLobinBaerenfaengeretal.2006, author = {Hilbert, Mirco and Lobin, Henning and B{\"a}renf{\"a}nger, Maja and L{\"u}ngen, Harald and Pusk{\´a}s, Csilla}, title = {A text-technological approach to automatic discourse analysis of complex texts}, series = {Proceedings of KONVENS 2006 (Konferenz zur Verarbeitung nat{\"u}rlicher Sprache)}, editor = {Butt, Miriam}, publisher = {Universit{\"a}t Konstanz}, address = {Konstanz}, isbn = {3-89318-050-8}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:352-opus-20131}, pages = {52 -- 55}, year = {2006}, abstract = {This paper describes the development of a rela­tional discourse parsing architecture for text doc­uments of a complex text type, namely scientific articles. To achieve this goal, several different linguistic knowledge sources and auxiliary ana­lyses on different linguistic levels are necessary.}, language = {en} } @incollection{BaerenfaengerLuengenHilbertetal.2010, author = {B{\"a}renf{\"a}nger, Maja and L{\"u}ngen, Harald and Hilbert, Mirco and Lobin, Henning}, title = {The role of generic and logical document structure in relational discourse analysis}, series = {Constraints in Discourse 2}, editor = {K{\"u}hnlein, Peter and Benz, Anton and Sidner, Candace L.}, publisher = {Benjamins}, address = {Amsterdam/ Philadelphia}, doi = {10.1075/pbns.194.05bar}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-77647}, series = {Pragmatics \& beyond : new series}, number = {194}, pages = {81 -- 104}, year = {2010}, abstract = {This study examines what kind of cues and constraints for discourse interpretation can be derived from the logical and generic document structure of complex texts by the example of scientific journal articles. We performed statistical analysis on a corpus of scientific articles annotated on different annotations layers within the framework of XML-based multi-layer annotation. We introduce different discourse segment types that constrain the textual domains in which to identify rhetorical relation spans, and we show how a canonical sequence of text type structure categories is derived from the corpus annotations. Finally, we demonstrate how and which text type structure categories assigned to complex discourse segments of the type "block" statistically constrain the occurrence of rhetorical relation types.}, language = {en} } @inproceedings{LuengenHerzberg2018, author = {L{\"u}ngen, Harald and Herzberg, Laura}, title = {Reply relations in CMC: types and annotation}, series = {Proceedings of the 6th Conference on Computer-Mediated Communication (CMC) and Social Media Corpora (CMC-corpora 2018), 17-18 September 2018, University of Antwerp}, editor = {Vandekerckhove, Reinhild and Fišer, Darja and Hilte, Lisa}, publisher = {University of Antwerp}, address = {Antwerpen}, isbn = {9789057285868}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-80414}, pages = {49 -- 52}, year = {2018}, abstract = {This paper analyses reply relations in computer-mediated communication (CMC), which occur between post units in CMC interactions and which describe references between posts. We take a look at existing practices in the description and annotation of such relations in chat, wiki talk, and blog corpora. We distinguish technical reply structures, indentation structures, and interpretative reply relations, which include reply relations induced by linguistic markers. We sort out the different levels of description and annotation that are involved and propose a solution for their combined representation within the TEI annotation framework.}, language = {en} } @incollection{LuengenHerzberg2018, author = {L{\"u}ngen, Harald and Herzberg, Laura}, title = {Reply relations in CMC: types and annotation}, series = {Proceedings of the 6th Conference on Computer-Mediated Communication (CMC) and Social Media Corpora (CMC-corpora 2018), 17-18 September 2018, University of Antwerp}, editor = {Vandekerckhove, Reinhild and Fišer, Darja and Hilte, Lisa}, publisher = {University of Antwerp}, address = {Antwerpen}, isbn = {9789057285868}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-81268}, pages = {49 -- 52}, year = {2018}, abstract = {This paper analyses reply relations in computer-mediated communication (CMC), which occur between post units in CMC interactions and which describe references between posts. We take a look at existing practices in the description and annotation of such relations in chat, wiki talk, and blog corpora. We distinguish technical reply structures, indentation structures, and interpretative reply relations, which include reply relations induced by linguistic markers. We sort out the different levels of description and annotation that are involved and propose a solution for their combined representation within the TEI annotation framework.}, language = {en} } @article{LuengenHerzberg2019, author = {L{\"u}ngen, Harald and Herzberg, Laura}, title = {Types and annotation of reply relations in computer-mediated communication}, series = {European Journal of Applied Linguistics (EuJAL)}, volume = {7}, number = {2}, publisher = {de Gruyter}, address = {Berlin [u.a.]}, issn = {2192-9521}, doi = {10.1515/eujal-2019-0006}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-92645}, pages = {305 -- 331}, year = {2019}, abstract = {This paper presents types and annotation layers of reply relations in computer- mediated communication (CMC). Reply relations hold between post units in CMC interactions and describe references from one given post to a previous post. We classify three types of reply relations in CMC interactions: first, technical replies, i. e. the possibility to reply directly to a previous post by clicking a 'reply' button; second, indentations, e. g. in wiki talk pages in which users insert their contributions in the existing talk page by indenting them and third, interpretative reply relations, i. e. the reply action is not realised formally but signalled by other structural or linguistics means such as address markers '@', greetings, citations and/or Q-A structures. We take a look at existing practices in the description and representation of such relations in corpora and examples of chat, Wikipedia talk pages, Twitter and blogs. We then provide an annotation proposal that combines the different levels of description and representation of reply relations and which adheres to the schemas and practices for encoding CMC corpus documents within the TEI framework as defined by the TEI CMC SIG. It constitutes a prerequisite for correctly identifying higher levels of interactional relations such as dialogue acts or discussion trees.}, language = {en} } @inproceedings{BeisswengerHerzbergLuengenetal.2019, author = {Beißwenger, Michael and Herzberg, Laura and L{\"u}ngen, Harald and Wigham, Ciara R.}, title = {cmc-core: a basic schema for encoding CMC corpora in TEI}, series = {Proceedings of the 7th Conference on CMC and Social Media Corpora for the Humanities (CMC-Corpora2019) 9-10 September 2019. Cergy-Pontoise University, France}, editor = {Longhi, Julien and Marinica, Claudia}, publisher = {Cergy-Pontoise University, France}, address = {Cergy-Pontoise, France}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-92668}, pages = {74 -- 75}, year = {2019}, abstract = {Since 2013 representatives of several French and German CMC corpus projects have developed three customizations of the TEI-P5 standard for text encoding in order to adapt the encoding schema and models provided by the TEI to the structural peculiarities of CMC discourse. Based on the three schema versions, a 4th version has been created which takes into account the experiences from encoding our corpora and which is specifically designed for the submission of a feature request to the TEI council. On our poster we would present the structure of this schema and its relations (commonalities and differences) to the previous schemas.}, language = {en} } @article{LuengenKupietz2014, author = {L{\"u}ngen, Harald and Kupietz, Marc}, title = {Das Deutsche Referenzkorpus DEREKO im Jubil{\"a}umsjahr 2014}, series = {Sprachreport}, volume = {30}, number = {3}, publisher = {Institut f{\"u}r Deutsche Sprache}, address = {Mannheim}, issn = {0178-644X}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-30396}, pages = {24 -- 26}, year = {2014}, language = {de} } @article{Luengen2012, author = {L{\"u}ngen, Harald}, title = {DeReKo-Archiv jetzt mit f{\"u}nf Milliarden Textw{\"o}rtern}, series = {Sprachreport : Informationen und Meinungen zur deutschen Sprache}, volume = {28}, number = {1}, issn = {0178-644X}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-35328}, pages = {26 -- 26}, year = {2012}, language = {de} } @incollection{LuengenKupietz2020, author = {L{\"u}ngen, Harald and Kupietz, Marc}, title = {IBK- und Social Media-Korpora am Leibniz-Institut f{\"u}r Deutsche Sprache}, series = {Deutsch in sozialen Medien: Interaktiv - multimodal - vielf{\"a}ltig}, editor = {Marx, Konstanze and Lobin, Henning and Schmidt, Axel}, publisher = {de Gruyter}, address = {Berlin [u.a.]}, isbn = {978-3-11-067886-4}, doi = {10.1515/9783110679885-016}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-97052}, series = {Jahrbuch / Leibniz-Institut f{\"u}r Deutsche Sprache (IDS)}, number = {- 2019}, pages = {319 -- 342}, year = {2020}, abstract = {Der Beitrag untersucht vorhandene L{\"o}sungen und neue M{\"o}glichkeiten des Korpusausbaus aus Social Media- und internetbasierter Kommunikation (IBK) f{\"u}r das Deutsche Referenzkorpus (DEREKO). DEREKO ist eine Sammlung gegenwartssprachlicher Schriftkorpora am IDS, die der sprachwissenschaftlichen {\"O}ffentlichkeit {\"u}ber die Korpusschnittstellen COSMAS II und KorAP angeboten wird. Anhand von Definitionen und Beispielen gehen wir zun{\"a}chst auf die Extensionen und {\"U}berlappungen der Konzepte Social Media, Internetbasierte Kommunikation und Computer-mediated Communication ein. Wir betrachten die rechtlichen Voraussetzungen f{\"u}r einen Korpusausbau aus Sozialen Medien, die sich aus dem k{\"u}rzlich in relevanten Punkten reformierten deutschen Urheberrecht, aus Pers{\"o}nlichkeitsrechten wie der europ{\"a}ischen Datenschutz-Grundverordnung ergeben und stellen Konsequenzen sowie m{\"o}gliche und tats{\"a}chliche Umsetzungen dar. Der Aufbau von Social Media-Korpora in großen Textmengen unterliegt außerdem korpustechnologischen Herausforderungen, die f{\"u}r traditionelle Schriftkorpora als gel{\"o}st galten oder gar nicht erst bestanden. Wir berichten, wie Fragen der Datenaufbereitung, des Korpus-Encoding, der Anonymisierung oder der linguistischen Annotation von Social Media Korpora f{\"u}r DEREKO angegangen wurden und welche Herausforderungen noch bestehen. Wir betrachten die Korpuslandschaft verf{\"u}gbarer deutschsprachiger IBK- und Social Media-Korpora und geben einen {\"U}berblick {\"u}ber den Bestand an IBK- und Social Media-Korpora und ihre Charakteristika (Chat-, Wiki Talk- und Forenkorpora) in DEREKO sowie von laufenden Projekten in diesem Bereich. Anhand korpuslinguistischer Mikro- und Makro-Analysen von Wikipedia-Diskussionen im Vergleich mit dem Gesamtbestand von DEREKO zeigen wir charakterisierende sprachliche Eigenschaften von Wikipedia-Diskussionen auf und bewerten ihren Status als Repr{\"a}sentant von IBK-Korpora.}, language = {de} } @misc{ArnoldFankhauserFissenietal.2019, author = {Arnold, Denis and Fankhauser, Peter and Fisseni, Bernhard and Kupietz, Marc and L{\"u}ngen, Harald and Schmidt, Thomas and Witt, Andreas}, title = {Daten{\"u}bernahmerichtlinien des Leibniz-Instituts f{\"u}r Deutsche Sprache}, edition = {Version 24.01.2019}, publisher = {Leibniz-Institut f{\"u}r Deutsche Sprache (IDS)}, address = {Mannheim}, doi = {10.14618/ids-pub-8791}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-87919}, pages = {10}, year = {2019}, language = {de} }