@inproceedings{BayerlLuengenGoeckeetal.2003, author = {Bayerl, Petra Saskia and L{\"u}ngen, Harald and Goecke, Daniela and Witt, Andreas and Naber, Daniel}, title = {Methods for the semantic analysis of document markup}, series = {Proceedings of the ACM Symposium on Document Engineering (DocEng 2003)}, editor = {Roisin, C{\´e}cile and Munson, Ethan and Vanoirbeek, Christine}, publisher = {ACM}, address = {New York}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-48014}, pages = {161 -- 170}, year = {2003}, abstract = {We present an approach on how to investigate what kind of semantic information is regularly associated with the structural markup of scientific articles. This approach addresses the need for an explicit formal description of the semantics of text-oriented XML-documents. The domain of our investigation is a corpus of scientific articles from psychology and linguistics from both English and German online available journals. For our analyses, we provide XML-markup representing two kinds of semantic levels: the thematic level (i.e. topics in the text world that the article is about) and the functional or rhetorical level. Our hypothesis is that these semantic levels correlate with the articles' document structure also represented in XML. Articles have been annotated with the appropriate information. Each of the three informational levels is modelled in a separate XML document, since in our domain, the different description levels might conflict so that it is impossible to model them within a single XML document. For comparing and mining the resulting multi-layered XML annotations of one article, a Prolog-based approach is used. It focusses on the comparison of XML markup that is distributed among different documents. Prolog predicates have been defined for inferring relations between levels of information that are modelled in separate XML documents. We demonstrate how the Prolog tool is applied in our corpus analyses.}, language = {en} } @article{WittGoeckeSasakietal.2005, author = {Witt, Andreas and Goecke, Daniela and Sasaki, Felix and L{\"u}ngen, Harald}, title = {Unification of XML Documents with Concurrent Markup}, series = {Literary and Linguistic Computing}, volume = {20}, number = {1}, publisher = {Oxford University Press}, address = {Oxford}, issn = {1477-4615}, doi = {10.1093/llc/fqh046}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45269}, pages = {103 -- 116}, year = {2005}, abstract = {An approach to the unification of XML (Extensible Markup Language) documents with identical textual content and concurrent markup in the framework of XML-based multi-layer annotation is introduced. A Prolog program allows the possible relationships between element instances on two annotation layers that share PCDATA to be explored and also the computing of a target node hierarchy for a well-formed, merged XML document. Special attention is paid to identity conflicts between element instances, for which a default solution that takes into account metarelations that hold between element types on the different annotation layers is provided. In addition, rules can be specified by a user to prescribe how identity conflicts should be solved for certain element types.}, language = {en} } @inproceedings{GoeckeMetzingWitt2005, author = {Goecke, Daniela and Metzing, Dieter and Witt, Andreas}, title = {Verkn{\"u}pfung heterogener texttechnologischer Ressourcen}, series = {INFORMATIK 2005 Informatik LIVE! Band 2. Beitr{\"a}ge der 35 Jahrestagung der Gesellschaft f{\"u}r Informatik e.V. (GI), 19. bis 22. September 2005 in Bonn P-68}, editor = {Cremers, Armin B. and Manthey, Rainer and Martini, Peter and Steinhage, Volker}, publisher = {Gesellschaft f{\"u}r Informatik}, address = {Bonn}, isbn = {3-88579-397-0}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45315}, pages = {94 -- 98}, year = {2005}, abstract = {Gegenstand des Workshop-Beitrags ist die Verkn{\"u}pfung heterogener linguistischer Ressourcen. Eine bedeutende Teilmenge von Ressourcen in der gegenw{\"a}rtigen linguistischen Forschung und Anwendung besteht zum einen aus XML-annotierten Textdokumenten und zum anderen aus externen Ressourcen wie Grammatiken, Lexika oder Ontologien. Es wird eine Architektur vorgestellt, die eine Integration heterogener Ressourcen erlaubt, wobei die Methoden zur Integration unabh{\"a}ngig von der jeweiligen Anwendung sind und somit verschiedene Verkn{\"u}pfungen erm{\"o}glichen. Eine exemplarische Anwendung der Methodologie ist die Analyse anaphorischer Beziehungen.}, language = {de} } @inproceedings{GoeckeLuengenSasakietal.2005, author = {Goecke, Daniela and L{\"u}ngen, Harald and Sasaki, Felix and Witt, Andreas and Farrar, Scott}, title = {GOLD and Discourse: Domain- and Community-Specific Extensions}, series = {Proceedings of the E-MELD Workshop on Morphosyntactic Annotation and Terminology: Linguistic Ontologies and Data Categories for Language Resources}, publisher = {E-MELD}, address = {Boston}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45321}, pages = {8}, year = {2005}, language = {en} } @incollection{GoeckeLuengenMetzingetal.2010, author = {Goecke, Daniela and L{\"u}ngen, Harald and Metzing, Dieter and St{\"u}hrenberg, Maik and Witt, Andreas}, title = {Different Views on Markup}, editor = {Witt, Andreas and Metzing, Dieter}, publisher = {Springer}, address = {Dordrecht}, isbn = {978-90-481-3330-7}, doi = {10.1007/978-90-481-3331-4_1}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-44966}, series = {Text, Speech and Language Technology}, number = {41}, pages = {1 -- 22}, year = {2010}, abstract = {In this chapter, two different ways of grouping information represented in document markup are examined: annotation levels, referring to conceptual levels of description, and annotation layers, referring to the technical realisation of markup using e.g. document grammars. In many current XML annotation projects, multiple levels are integrated into one layer, often leading to the problem of having to deal with overlapping hierarchies. As a solution, we propose a framework for XML-based multiple, independent XML annotation layers for one text, based on an abstract representation of XML documents with logical predicates. Two realisations of the abstract representation are presented, a Prolog fact base format together with an application architecture, and a specification for XML native databases. We conclude with a discussion of projects that have currently adopted this framework.}, language = {en} } @inproceedings{WittLuengenSasakietal.2004, author = {Witt, Andreas and L{\"u}ngen, Harald and Sasaki, Felix and Goecke, Daniela}, title = {Unification of XML Documents with Concurrent Markup}, series = {ALLCACH2004, Joint Conference of the ALLC and ACH, G{\"o}teborg}, publisher = {University of G{\"o}teborg}, address = {G{\"o}teborg}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45391}, pages = {5}, year = {2004}, language = {en} } @incollection{WittStuehrenbergGoeckeetal.2011, author = {Witt, Andreas and St{\"u}hrenberg, Maik and Goecke, Daniela and Metzing, Dieter}, title = {Integrated Linguistic Annotation Models and Their Application in the Domain of Antecedent Detection}, series = {Modeling, Learning, and Processing of Text-Technological Data Structures}, editor = {Mehler, Alexander and K{\"u}hnberger, Kai-Uwe and Lobin, Henning and L{\"u}ngen, Harald and Storrer, Angelika and Witt, Andreas}, publisher = {Springer}, address = {Berlin/Heidelberg}, isbn = {978-3-642-22612-0}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45077}, series = {Studies in Computational Intelligence}, number = {370}, pages = {197 -- 218}, year = {2011}, abstract = {Seamless integration of various, often heterogeneous linguistic resources in terms of their output formats and a combined analysis of the respective annotation layers are crucial tasks for linguistic research. After a decade of concentration on the development of formats to structure single annotations for specific linguistic issues, in the last years a variety of specifications to store multiple annotations over the same primary data has been developed. The paper focuses on the integration of the knowledge resource logical document structure information into a text document to enhance the task of automatic anaphora resolution both for the task of candidate detection and antecedent selection. The paper investigates data structures necessary for knowledge integration and retrieval.}, language = {en} } @inproceedings{StuehrenbergWittGoeckeetal.2006, author = {St{\"u}hrenberg, Maik and Witt, Andreas and Goecke, Daniela and Metzing, Dieter and Schonefeld, Oliver}, title = {Multidimensional markup and heterogeneous linguistic resources}, series = {Proceedings of the 5th Workshop on NLP and XML: Multi-Dimensional Markup in Natural Language Processing}, editor = {Ahn, David and Tjong Kim Sang, Erik and Wilcock, Graham}, publisher = {ACL}, address = {Stroudsburg}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45197}, pages = {85 -- 88}, year = {2006}, abstract = {The paper discusses two topics: firstly an approach of using multiple layers of annotation is sketched out. Regarding the XML representation this approach is similar to standoff annotation. A second topic is the use of heterogeneous linguistic resources (e.g., XML annotated documents, taggers, lexical nets) as a source for semiautomatic multi-dimensional markup to resolve typical linguistic issues, dealing with anaphora resolution as a case study.}, language = {en} } @inproceedings{GoeckeWitt2006, author = {Goecke, Daniela and Witt, Andreas}, title = {Exploiting logical document structure for anaphora resolution}, series = {Proceedings of the 5th International Conference on Language Resources and Evaluation (LREC-2006)}, editor = {Hinrichs, Erhard and Ide, Nancy and Palmer, Martha and Pustejovsky, James}, publisher = {European Language Resources Association (ELRA)}, address = {Paris}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-45214}, pages = {1077 -- 1080}, year = {2006}, abstract = {The aim of the paper is twofold. Firstly, an approach is presented how to select the correct antecedent for an anaphoric element according to the kind of text segments in which both of them occur. Basically, information on logical text structure (e.g. chapters, sections, paragraphs) is used in order to select the antecedent life span of a linguistic expression, i.e. some linguistic expressions are more likely to be chosen as an antecedent throughout the whole text than others. In addition, an appropriate search scope for an anaphora expressed by an expression can be defined according to the document structuring elements that include the linguistic expression. Corpus investigations give rise to the supposition that logical text structure influences the search scope of candidates for antecedents. Second, a solution is presented how to integrate the resources used for anaphora resolution. In this approach, multi-layered XML annotation is used in order to make a set of resources accessible for the anaphora resolution system.}, language = {en} } @incollection{GoeckeNaberWitt2003, author = {Goecke, Daniela and Naber, Daniel and Witt, Andreas}, title = {Query von Multiebenen-annotierten XML-Dokumenten mit Prolog}, series = {Sprachtechnologie f{\"u}r die multilinguale Kommunikation. Textproduktion, Recherche, {\"U}bersetzung, Lokalisierung. Beitr{\"a}ge der GLDV-Fr{\"u}hjahrstagung 2003}, editor = {Seewald-Heeg, Uta}, publisher = {Gardez! Verlag}, address = {Sankt Augustin}, issn = {0175-1336}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-64205}, series = {Sprachwissenschaft, Computerlinguistik und Neue Medien}, number = {5}, pages = {391 -- 405}, year = {2003}, language = {de} }