@inproceedings{KnightFitzpatrickMorrisetal.2017, author = {Dawn Knight and Tess Fitzpatrick and Steve Morris and Jeremy Evas and Paul Rayson and Irena Spasić and Mark Stonelake and Enlli M{\^o}n Thomas and Steven Neale and Jennifer Needs and Scott Piao and Mair Rees and Gareth Watkins and Laurence Anthony and Thomas Michael Cobb and Margaret Deuchar and Kevin Donnelly and Michael McCarthy and Kevin Scannell}, title = {Creating CorCenCC (Corpws Cenedlaethol Cymraeg Cyfoes - The National Corpus of Contemporary Welsh)}, series = {Proceedings of the Workshop on Challenges in the Management of Large Corpora and Big Data and Natural Language Processing (CMLC-5+BigNLP) 2017 including the papers from the Web-as-Corpus (WAC-XI) guest section. Birmingham, 24 July 2017}, editor = {Piotr Bański and Marc Kupietz and Harald L{\"u}ngen and Paul Rayson and Hanno Biber and Evelyn Breiteneder and Simon Clematide and John Mariani and Mark Stevenson and Theresa Sick}, publisher = {Institut f{\"u}r Deutsche Sprache}, address = {Mannheim}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-62578}, pages = {13 -- 14}, year = {2017}, abstract = {CorCenCC is an interdisciplinary and multiinstitutional project that is creating a large-scale, open-source corpus of contemporary Welsh. CorCenCC will be the first ever large-scale corpus to represent spoken, written and electronicallymediated Welsh (compiling an initial data set of 10 million Welsh words), with a functional design informed, from the outset, by representatives of all anticipated academic and community user groups.}, language = {en} }