@incollection{BańskiDiewaldKupietzetal.2023, author = {Bański, Piotr and Diewald, Nils and Kupietz, Marc and Trawiński, Beata}, title = {Applying the newly extended European reference corpus EuReCo. Pilot studies of light-verb constructions in German, Romanian, Hungarian and Polish}, booktitle = {10th International Contrastive Linguistics Conference (ICLC-10), 18-21 July, 2023, Mannheim, Germany}, editor = {Trawiński, Beata and Kupietz, Marc and Proost, Kristel and Zinken, J{\"o}rg}, isbn = {978-3-937241-96-8}, doi = {10.14618/f8rt-m155}, url = {https://iclc10.ids-mannheim.de/}, pages = {274 -- 276}, year = {2023}, abstract = {It is well known that the distribution of lexical and grammatical patterns is size- and register-sensitive (Biber 1986, and later publications). This fact alone presents a challenge to many corpus-oriented linguistic studies focusing on a single language. When it comes to cross-linguistic studies using corpora, the challenge becomes even greater due to the lack of high-quality multilingual corpora (Kupietz et al. 2020; Kupietz/Trawiński 2022), which are comparable with respect to the size and the register. That was the motivation for the creation of the European Reference Corpus EuReCo, an initiative started in 2013 at the Leibniz Institute for the German Language (IDS) together with several European partners (Kupietz et al. 2020). EuReCo is an emerging federated corpus, with large virtual comparable corpora across various languages and with an infrastructure supporting contrastive research. The core of the infrastructure is KorAP (Diewald et al. 2016), a scalable open-source platform supporting the analysis and visualisation of properties of texts annotated by multiple and potentially conflicting information layers, and supporting several corpus query languages. Until recently, EuReCo consisted of three monolingual subparts: the German Reference Corpus DeReKo (Kupietz et al. 2018), the Reference Corpus of Contemporary Romanian Language (Barbu Mititelu/Tufi{\c{s}}/Irimia 2018), and the Hungarian National Corpus (V{\´a}radi 2002). The goal of the present submission is twofold. On the one hand, it reports about the new component of EuReCo: a sample of the National Corpus of Polish (Przepi{\´o}rkowski et al. 2010). On the other hand, it presents the results of a new pilot study using the newly extended EuReCo. This pilot study investigates selected Polish collocations involving light verbs and their prepositional / nominal complements (Fig. 1) and extends the collocation analyses of German, Romanian and Hungarian (Fig. 2) discussed in Kupietz/Trawiński (2022).}, subject = {Korpus }, language = {en} }