@book{OPUS4-8998, title = {Proceedings of the Workshop on Challenges in the Management of Large Corpora (CMLC-7) 2019. Cardiff, 22 July 2019}, editor = {Piotr Bański and Adrien Barbaresi and Hanno Biber and Evelyn Breiteneder and Simon Clematide and Marc Kupietz and Harald L{\"u}ngen and Caroline Iliadi}, publisher = {Leibniz-Institut f{\"u}r Deutsche Sprache}, address = {Mannheim}, doi = {10.14618/ids-pub-8998}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-89986}, pages = {39}, year = {2019}, abstract = {Contents: 1. Johannes Gra{\"e}n, Tannon Kew, Anastassia Shaitarova and Martin Volk, \"Modelling Large Parallel Corpora\", S. 1-8 2. Pedro Javier Ortiz Su{\´a}rez, Beno{\^i}t Sagot and Laurent Romary, \"Asynchronous Pipelines for Processing Huge Corpora on Medium to Low Resource Infrastructures\", S. 9-16 3. Vladim{\´i}r Benko, \"Deduplication in Large Web Corpora\", S. 17-22 4. Mark Davies, \"The best of both worlds: Multi-billion word “dynamic” corpora\", S. 23-28 5. Adrien Barbaresi, \"On the need for domain-focused web corpora\", S. 29-32 6. Marc Kupietz, Eliza Margaretha, Nils Diewald, Harald L{\"u}ngen and Peter Fankhauser, \"What's New in EuReCo? Interoperability, Comparable Corpora, Licensing\", S. 33-39}, language = {en} }