@book{OPUS4-6243, title = {Proceedings of the Workshop on Challenges in the Management of Large Corpora and Big Data and Natural Language Processing (CMLC-5+BigNLP) 2017 including the papers from the Web-as-Corpus (WAC-XI) guest section. Birmingham, 24 July 2017}, editor = {Piotr Bański and Marc Kupietz and Harald L{\"u}ngen and Paul Rayson and Hanno Biber and Evelyn Breiteneder and Simon Clematide and John Mariani and Mark Stevenson and Theresa Sick}, publisher = {Institut f{\"u}r Deutsche Sprache}, address = {Mannheim}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-62434}, pages = {60}, year = {2017}, abstract = {Contents: 1. Andreas Dittrich: Intra-connecting a small exemplary literary corpus with semantic web technologies for exploratory literary studies, S. 1 2. John Kirk, Anna Čerm{\´a}kov{\´a}: From ICE to ICC: The new International Comparable Corpus, S. 7 3. Dawn Knight, Tess Fitzpatrick, Steve Morris, Jeremy Evas, Paul Rayson, Irena Spasic, Mark Stonelake, Enlli M{\^o}n Thomas, Steven Neale, Jennifer Needs, Scott Piao, Mair Rees, Gareth Watkins, Laurence Anthony, Thomas Michael Cobb, Margaret Deuchar, Kevin Donnelly, Michael McCarthy, Kevin Scannell: Creating CorCenCC (Corpws Cenedlaethol Cymraeg Cyfoes – The National Corpus of Contemporary Welsh), S. 13 4. Marc Kupietz, Andreas Witt, Piotr Bański, Dan Tufiş, Dan Cristea, Tam{\´a}s V{\´a}radi: EuReCo - Joining Forces for a European Reference Corpus as a sustainable base for cross-linguistic research, S. 15 5. Harald L{\"u}ngen, Marc Kupietz: CMC Corpora in DeReKo, S. 20 6. David McClure, Mark Algee-Hewitt, Douris Steele, Erik Fredner, Hannah Walser: Organizing corpora at the Stanford Literary Lab, S. 25 7. Radoslav R{\´a}bara, Pavel Rychl{\´y} ,Ondřej Herman: Accelerating corpus search using multiple cores, S. 30 8. John Vidler, Stephen Wattam: Keeping Properties with the Data: CL-MetaHeaders – An Open Specification, S. 35 9. Vladimir Benko: Are Web Corpora Inferior? The Case of Czech and Slovak, S. 43 10. Edyta Jurkiewicz-Rohrbacher, Zrinka Kolaković, Bj{\"o}rn Hansen: Web Corpora – the best possible solution for tracking phenomena in underresourced languages: clitics in Bosnian, Croatian and Serbian, S. 49 11. V{\´i}t Suchomel: Removing Spam from Web Corpora Through Supervised Learning Using FastText, S. 56}, language = {en} }