@inproceedings{EvertHardie2015, author = {Stefan Evert and Andrew Hardie}, title = {Ziggurat: A new data model and indexing format for large annotated text corpora}, series = {Proceedings of the 3rd Workshop on Challenges in the Management of Large Corpora (CMLC-3), Lancaster, 20 July 2015}, editor = {Piotr Bański and Hanno Biber and Evelyn Breiteneder and Marc Kupietz and Harald L{\"u}ngen and Andreas Witt}, publisher = {Institut f{\"u}r Deutsche Sprache}, address = {Mannheim}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-38335}, pages = {21 -- 27}, year = {2015}, abstract = {The IMS Open Corpus Workbench (CWB) software currently uses a simple tabular data model with proven limitations. We outline and justify the need for a new data model to underlie the next major version of CWB. This data model, dubbed Ziggurat, defines a series of types of data layer to represent different structures and relations within an annotated corpus; each such layer may contain variables of different types. Ziggurat will allow us to gradually extend and enhance CWB’s existing CQP-syntax for corpus queries, and also make possible more radical departures relative not only to the current version of CWB but also to other contemporary corpus-analysis software.}, language = {en} }