@inproceedings{MuellerStrube2022, author = {Mark-Christoph M{\"u}ller and Michael Strube}, title = {An API for discourse-level access to XML-encoded corpora}, series = {Proceedings of the Third International Conference on Language Resources and Evaluation (LREC’02). May 29-31, 2002, Las Palmas, Canary Islands, Spain}, editor = {Manuel Gonz{\´a}lez Rodr{\´i}guez and Carmen Paz Suarez Araujo}, publisher = {European Language Resources Association (ELRA)}, address = {Paris}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-111602}, pages = {26 -- 30}, year = {2022}, abstract = {We describe a simple and efficient Java object model and application programming interface (API) for (possibly multi-modal) annotated natural language corpora. Corpora are represented as elements like Sentences, Turns, Utterances, Words, Gestures and Markables. The API allows linguists to access corpora in terms of these discourse-level elements, i.e. at a conceptual level they are familiar with, with the flexibility offered by a general purpose programming language. It is also a contribution to corpus standardization efforts because it is based on a straightforward and easily extensible data model which can serve as a target for conversion of different corpus formats.}, language = {en} }