@inproceedings{Schwarz2024, author = {Schwarz, Pia}, title = {Semiautomatic data generation for academic Named Entity Recognition in German text corpora}, booktitle = {Proceedings of the 20th Conference on Natural Language Processing (KONVENS 2024). September 10-13, 2024}, editor = {Luz de Araujo, Pedro Henrique and Baumann, Andreas and Gromann, Dagmar and Krenn, Brigitte and Roth, Benjamin and Wiegand, Michael}, url = {https://aclanthology.org/2024.konvens-main.20}, pages = {173 -- 181}, year = {2024}, abstract = {An NER model is trained to recognize three types of entities in academic contexts: person, organization, and research area. Training data is generated semiautomatically from newspaper articles with the help of word lists for the individual entity types, an off-the-shelf NE recognizer, and an LLM. Experiments fine-tuning a BERT model with different strategies of post-processing the automatically generated data result in several NER models achieving overall F1 scores of up to 92.45\%.}, subject = {Named Entity Recognition}, language = {en} }