@book{OPUS4-11111, title = {Proceedings of the LREC 2022 Workshop on Challenges in the Management of Large Corpora (CMLC-10 2022). Marseille, 20 June 2022}, editor = {Piotr Bański and Adrien Barbaresi and Simon Clematide and Marc Kupietz and Harald L{\"u}ngen}, publisher = {European Language Resources Association (ELRA)}, address = {Paris}, isbn = {979-10-95546-83-2}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-111115}, pages = {viii; 36}, year = {2022}, abstract = {Contents: 1. Vasile Pais, Maria Mitrofan, Verginica Barbu Mititelu, Elena Irimia, Roxana Micu and Carol Luca Gasan: Challenges in Creating a Representative Corpus of Romanian Micro-Blogging Text. Pp. 1-7 2. Modest von Korff: Exhaustive Indexing of PubMed Records with Medical Subject Headings. Pp. 8-15 3. Luca Brigada Villa: UDeasy: a Tool for Querying Treebanks in CoNLL-U Format. Pp. 16-19 4. Nils Diewald: Matrix and Double-Array Representations for Efficient Finite State Tokenization. Pp. 20-26 5. Peter Fankhauser and Marc Kupietz: Count-Based and Predictive Language Models for Exploring DeReKo. Pp. 27-31 6. Hanno Biber: “The word expired when that world awoke.” New Challenges for Research with Large Text Corpora and Corpus-Based Discourse Studies in Totalitarian Times. Pp. 32-35}, language = {en} }