@inproceedings{MuellerGhoshWittigetal.2022, author = {Mark-Christoph M{\"u}ller and Sucheta Ghosh and Ulrike Wittig and Maja Rey}, title = {Word-level alignment of paper documents with their electronic full-text counterparts}, series = {Proceedings of the 20th Workshop on Biomedical Language Processing. June 11, 2021}, editor = {Dina Demner-Fushman and Kevin Bretonnel Cohen and Sophia Ananiadou and Junichi Tsujii}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, Pennsylvania}, isbn = {978-1-954085-40-4}, doi = {10.18653/v1/2021.bionlp-1.19}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-110839}, pages = {168 -- 179}, year = {2022}, abstract = {We describe a simple procedure for the automatic creation of word-level alignments between printed documents and their respective full-text versions. The procedure is unsupervised, uses standard, off-the-shelf components only, and reaches an F-score of 85.01 in the basic setup and up to 86.63 when using pre- and post-processing. Potential areas of application are manual database curation (incl. document triage) and biomedical expression OCR.}, language = {en} }