@inproceedings{WittLuengenGibbon2016, author = {Andreas Witt and Harald L{\"u}ngen and Dafydd Gibbon}, title = {Enhancing speech corpus resources with multiple lexical tag layers}, series = {Proceedings of the 2nd International Conference on Language Resources and Evaluation (LREC-2000). Athen, Griechenland}, publisher = {European Language Resources Association (ELRA)}, address = {Paris}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-45517}, year = {2016}, abstract = {We describe a general two-stage procedure for re-using a custom corpus for spoken language system development involving a transformation from character-based markup to XML, and DSSSL stylesheet-driven XML markup enhancement with multiple lexical tag trees. The procedure was used to generate a fully tagged corpus; alternatively with greater economy of computing resources, it can be employed as a parametrised ‘tagging on demand’ filter. The implementation will shortly be released as a public resource together with the corpus (German spoken dialogue, about 500k word form tokens) and lexicon (about 75k word form types).}, language = {en} }