@book{OPUS4-6243, title = {Proceedings of the Workshop on Challenges in the Management of Large Corpora and Big Data and Natural Language Processing (CMLC-5+BigNLP) 2017 including the papers from the Web-as-Corpus (WAC-XI) guest section. Birmingham, 24 July 2017}, editor = {Bański, Piotr and Kupietz, Marc and L{\"u}ngen, Harald and Rayson, Paul and Biber, Hanno and Breiteneder, Evelyn and Clematide, Simon and Mariani, John and Stevenson, Mark and Sick, Theresa}, url = {http://corpora.ids-mannheim.de/cmlc-2017.html}, pages = {60}, year = {2017}, abstract = {Contents: 1. Andreas Dittrich: Intra-connecting a small exemplary literary corpus with semantic web technologies for exploratory literary studies, S. 1 2. John Kirk, Anna Čerm{\´a}kov{\´a}: From ICE to ICC: The new International Comparable Corpus, S. 7 3. Dawn Knight, Tess Fitzpatrick, Steve Morris, Jeremy Evas, Paul Rayson, Irena Spasic, Mark Stonelake, Enlli M{\^o}n Thomas, Steven Neale, Jennifer Needs, Scott Piao, Mair Rees, Gareth Watkins, Laurence Anthony, Thomas Michael Cobb, Margaret Deuchar, Kevin Donnelly, Michael McCarthy, Kevin Scannell: Creating CorCenCC (Corpws Cenedlaethol Cymraeg Cyfoes - The National Corpus of Contemporary Welsh), S. 13 4. Marc Kupietz, Andreas Witt, Piotr Bański, Dan Tufi{\c{s}}, Dan Cristea, Tam{\´a}s V{\´a}radi: EuReCo - Joining Forces for a European Reference Corpus as a sustainable base for cross-linguistic research, S. 15 5. Harald L{\"u}ngen, Marc Kupietz: CMC Corpora in DeReKo, S. 20 6. David McClure, Mark Algee-Hewitt, Douris Steele, Erik Fredner, Hannah Walser: Organizing corpora at the Stanford Literary Lab, S. 25 7. Radoslav R{\´a}bara, Pavel Rychl{\´y} ,Ondřej Herman: Accelerating corpus search using multiple cores, S. 30 8. John Vidler, Stephen Wattam: Keeping Properties with the Data: CL-MetaHeaders - An Open Specification, S. 35 9. Vladimir Benko: Are Web Corpora Inferior? The Case of Czech and Slovak, S. 43 10. Edyta Jurkiewicz-Rohrbacher, Zrinka Kolaković, Bj{\"o}rn Hansen: Web Corpora - the best possible solution for tracking phenomena in underresourced languages: clitics in Bosnian, Croatian and Serbian, S. 49 11. V{\´i}t Suchomel: Removing Spam from Web Corpora Through Supervised Learning Using FastText, S. 56}, subject = {Korpus }, language = {en} }