@article{KoplenigKupietzWolfer2022, author = {Alexander Koplenig and Marc Kupietz and Sascha Wolfer}, title = {Testing the relationship between word length, frequency, and predictability based on the German Reference Corpus}, series = {Cognitive Science}, volume = {46}, number = {6}, publisher = {Wiley}, address = {Hoboken}, issn = {1551-6709}, doi = {10.1111/cogs.13090}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-110893}, year = {2022}, abstract = {In a recent article, Meylan and Griffiths (Meylan \& Griffiths, 2021, henceforth, M\&G) focus their attention on the significant methodological challenges that can arise when using large-scale linguistic corpora. To this end, M\&G revisit a well-known result of Piantadosi, Tily, and Gibson (2011, henceforth, PT\&G) who argue that average information content is a better predictor of word length than word frequency. We applaud M\&G who conducted a very important study that should be read by any researcher interested in working with large-scale corpora. The fact that M\&G mostly failed to find clear evidence in favor of PT\&G's main finding motivated us to test PT\&G's idea on a subset of the largest archive of German language texts designed for linguistic research, the German Reference Corpus consisting of ∼43 billion words. We only find very little support for the primary data point reported by PT\&G.}, language = {en} }