@inproceedings{WittL{\"u}ngenGibbon2016, author = {Witt, Andreas and L{\"u}ngen, Harald and Gibbon, Dafydd}, title = {Enhancing speech corpus resources with multiple lexical tag layers}, booktitle = {Proceedings of the 2nd International Conference on Language Resources and Evaluation (LREC-2000). Athen, Griechenland}, url = {http://lrec-conf.org/proceedings/lrec2000/}, pages = {5}, year = {2016}, abstract = {We describe a general two-stage procedure for re-using a custom corpus for spoken language system development involving a transformation from character-based markup to XML, and DSSSL stylesheet-driven XML markup enhancement with multiple lexical tag trees. The procedure was used to generate a fully tagged corpus; alternatively with greater economy of computing resources, it can be employed as a parametrised 'tagging on demand' filter. The implementation will shortly be released as a public resource together with the corpus (German spoken dialogue, about 500k word form tokens) and lexicon (about 75k word form types).}, language = {en} }