@inproceedings{WiegandRothLasarcyketal.2012, author = {Wiegand, Michael and Roth, Benjamin and Lasarcyk, Eva and K{\"o}ser, Stephanie and Klakow, Dietrich}, title = {A Gold Standard for Relation Extraction in the Food Domain}, series = {Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), May 21-27, 2012, Istanbul, Turkey}, editor = {Calzolari, Nicoletta and Choukri, Khalid and Declerck, Thierry and Uğur Doğan, Mehmet and Maegaard, Bente and Mariani, Joseph and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios}, publisher = {European Language Resources Association}, address = {Paris}, isbn = {978-2-9517408-7-7}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84454}, pages = {507 -- 514}, year = {2012}, abstract = {We present a gold standard for semantic relation extraction in the food domain for German. The relation types that we address are motivated by scenarios for which IT applications present a commercial potential, such as virtual customer advice in which a virtual agent assists a customer in a supermarket in finding those products that satisfy their needs best. Moreover, we focus on those relation types that can be extracted from natural language text corpora, ideally content from the internet, such as web forums, that are easy to retrieve. A typical relation type that meets these requirements are pairs of food items that are usually consumed together. Such a relation type could be used by a virtual agent to suggest additional products available in a shop that would potentially complement the items a customer has already in their shopping cart. Our gold standard comprises structural data, i.e. relation tables, which encode relation instances. These tables are vital in order to evaluate natural language processing systems that extract those relations.}, language = {en} } @inproceedings{WiegandBalahurRothetal.2010, author = {Wiegand, Michael and Balahur, Alexandra and Roth, Benjamin and Klakow, Dietrich and Montoyo, Andr{\´e}s}, title = {A Survey on the Role of Negation in Sentiment Analysis}, series = {Proceedings of the Workshop on Negation and Speculation in Natural Language Processing (NeSp-NLP 2010), 10 July 2010, Uppsala, Sweden}, editor = {Morante, Roser and Sporleder, Caroline}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, isbn = {9789057282669}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84250}, pages = {60 -- 68}, year = {2010}, abstract = {This paper presents a survey on the role of negation in sentiment analysis. Negation is a very common linguistic construction that affects polarity and, therefore, needs to be taken into consideration in sentiment analysis. We will present various computational approaches modeling negation in sentiment analysis. We will, in particular, focus on aspects such as level of representation used for sentiment analysis, negation word detection and scope of negation. We will also discuss limits and challenges of negation modeling on that task.}, language = {en} } @inproceedings{WiegandRothKlakow2014, author = {Wiegand, Michael and Roth, Benjamin and Klakow, Dietrich}, title = {Automatic Food Categorization from Large Unlabeled Corpora and Its Impact on Relation Extraction}, series = {Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, April 26-30, 2014, Gothenburg, Sweden}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, isbn = {978-1-937284-78-7}, doi = {10.3115/v1/E14-1071}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84696}, pages = {673 -- 682}, year = {2014}, abstract = {We present a weakly-supervised induction method to assign semantic information to food items. We consider two tasks of categorizations being food-type classification and the distinction of whether a food item is composite or not. The categorizations are induced by a graph-based algorithm applied on a large unlabeled domain-specific corpus. We show that the usage of a domain-specific corpus is vital. We do not only outperform a manually designed open-domain ontology but also prove the usefulness of these categorizations in relation extraction, outperforming state-of-the-art features that include syntactic information and Brown clustering.}, language = {en} } @article{WiegandKlennerKlakow2013, author = {Wiegand, Michael and Klenner, Manfred and Klakow, Dietrich}, title = {Bootstrapping polarity classifiers with rule-based classification}, series = {Language Resources and Evaluation}, volume = {47}, number = {4}, publisher = {Springer}, address = {Dordrecht}, issn = {1574-0218}, doi = {10.1007/s10579-013-9218-3}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84425}, pages = {1049 -- 1088}, year = {2013}, abstract = {In this article, we examine the effectiveness of bootstrapping supervised machine-learning polarity classifiers with the help of a domain-independent rule-based classifier that relies on a lexical resource, i.e., a polarity lexicon and a set of linguistic rules. The benefit of this method is that though no labeled training data are required, it allows a classifier to capture in-domain knowledge by training a supervised classifier with in-domain features, such as bag of words, on instances labeled by a rule-based classifier. Thus, this approach can be considered as a simple and effective method for domain adaptation. Among the list of components of this approach, we investigate how important the quality of the rule-based classifier is and what features are useful for the supervised classifier. In particular, the former addresses the issue in how far linguistic modeling is relevant for this task. We not only examine how this method performs under more difficult settings in which classes are not balanced and mixed reviews are included in the data set but also compare how this linguistically-driven method relates to state-of-the-art statistical domain adaptation.}, language = {en} } @inproceedings{WiegandKlakow2010, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Bootstrapping Supervised Machine-learning Polarity Classifiers with Rule-based Classification}, series = {Proceedings of the 1st Workshop on Computational Approaches to Subjectivity and Sentiment Analysis (WASSA), August 17 2010, Lisbon, Portugal}, publisher = {Universidad de Alicante}, address = {Alicante}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84473}, pages = {59 -- 66}, year = {2010}, abstract = {In this paper, we explore the effectiveness of bootstrapping supervised machine-learning polarity classifiers using the output of domain-independent rule-based classifiers. The benefit of this method is that no labeled training data are required. Still, this method allows to capture in-domain knowledge by training the supervised classifier on in-domain features, such as bag of words. We investigate how important the quality of the rule-based classifier is and what features are useful for the supervised classifier. The former addresses the issue in how far relevant constructions for polarity classification, such as word sense disambiguation, negation modeling, or intensification, are important for this self-training approach. We not only compare how this method relates to conventional semi-supervised learning but also examine how it performs under more difficult settings in which classes are not balanced and mixed reviews are included in the dataset.}, language = {en} } @inproceedings{WiegandRothKlakow2015, author = {Wiegand, Michael and Roth, Benjamin and Klakow, Dietrich}, title = {Combining Pattern-Based and Distributional Similarity for Graph-Based Noun Categorization}, series = {Natural Language Processing and Information Systems. Proceedings of the 20th International Conference on Applications of Natural Language to Information Systems, NLDB 2015, Passau, Germany, June 17-19, 2015}, editor = {Biemann, Chris and Handschuh, Siegfried and Freitas, Andr{\´e} and Meziane, Farid and M{\´e}tais, Elisabeth}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-19580-3}, doi = {10.1007/978-3-319-19581-0_5}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-87479}, series = {Lecture Notes in Computer Science}, number = {9103}, pages = {64 -- 72}, year = {2015}, abstract = {We examine the combination of pattern-based and distributional similarity for the induction of semantic categories. Pattern-based methods are precise and sparse while distributional methods have a higher recall. Given these particular properties we use the prediction of distributional methods as a back-off to pattern-based similarity. Since our pattern-based approach is embedded into a semi-supervised graph clustering algorithm, we also examine how distributional information is best added to that classifier. Our experiments are carried out on 5 different food categorization tasks.}, language = {en} } @inproceedings{WiegandKlakow2010, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Convolution Kernels for Opinion Holder Extraction}, series = {Proceedings of HLT '10 Human Language Technologies: The 2010 Annual Conference of the North American Chapter of the Association for Computational Linguistics, June 2-4, 2010, Los Angeles, California}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, isbn = {978-1-932432-65-7}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84345}, pages = {795 -- 803}, year = {2010}, abstract = {Opinion holder extraction is one of the important subtasks in sentiment analysis. The effective detection of an opinion holder depends on the consideration of various cues on various levels of representation, though they are hard to formulate explicitly as features. In this work, we propose to use convolution kernels for that task which identify meaningful fragments of sequences or trees by themselves. We not only investigate how different levels of information can be effectively combined in different kernels but also examine how the scope of these kernels should be chosen. In general relation extraction, the two candidate entities thought to be involved in a relation are commonly chosen to be the boundaries of sequences and trees. The definition of boundaries in opinion holder extraction, however, is less straightforward since there might be several expressions beside the candidate opinion holder to be eligible for being a boundary.}, language = {en} } @inproceedings{WiegandKlakow2011, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Convolution Kernels for Subjectivity Detection}, series = {Proceedings of the 18th Nordic Conference of Computational Linguistics (NODALIDA 2011), May 11-13, 2011, Riga, Latvia}, editor = {Sandford Pedersen, Bolette and Nešpore, Gunta and Skadiņa, Inguna}, publisher = {Northern European Association for Language Technology}, address = {Uppsala}, issn = {1736-6305}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-85032}, series = {NEALT Proceedings Series}, number = {11}, pages = {254 -- 261}, year = {2011}, abstract = {In this paper, we explore different linguistic structures encoded as convolution kernels for the detection of subjective expressions. The advantage of convolution kernels is that complex structures can be directly provided to a classifier without deriving explicit features. The feature design for the detection of subjective expressions is fairly difficult and there currently exists no commonly accepted feature set. We consider various structures, such as constituency parse structures, dependency parse structures, and predicate-argument structures. In order to generalize from lexical information, we additionally augment these structures with clustering information and the task-specific knowledge of subjective words. The convolution kernels will be compared with a standard vector kernel.}, language = {en} } @inproceedings{WiegandLeidnerKlakow2008, author = {Wiegand, Michael and Leidner, Jochen L. and Klakow, Dietrich}, title = {Cost-Sensitive Learning in Answer Extraction}, series = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08), May 28-30, 2008, Marrakech, Morocco}, editor = {Calzolari, Nicoletta and Choukri, Khalid and Maegaard, Bente and Mariani, Joseph and Odijk, Jan and Piperidis, Stelios and Tapias, Daniel}, publisher = {European Language Resources Association}, address = {Paris}, isbn = {2-9517408-4-0}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-85373}, pages = {711 -- 714}, year = {2008}, abstract = {One problem of data-driven answer extraction in open-domain factoid question answering is that the class distribution of labeled training data is fairly imbalanced. In an ordinary training set, there are far more incorrect answers than correct answers. The class-imbalance is, thus, inherent to the classification task. It has a deteriorating effect on the performance of classifiers trained by standard machine learning algorithms. They usually have a heavy bias towards the majority class, i.e. the class which occurs most often in the training set. In this paper, we propose a method to tackle class imbalance by applying some form of cost-sensitive learning which is preferable to sampling. We present a simple but effective way of estimating the misclassification costs on the basis of class distribution. This approach offers three benefits. Firstly, it maintains the distribution of the classes of the labeled training data. Secondly, this form of meta-learning can be applied to a wide range of common learning algorithms. Thirdly, this approach can be easily implemented with the help of state-of-the-art machine learning software.}, language = {en} } @inproceedings{WiegandRothKlakow2012, author = {Wiegand, Michael and Roth, Benjamin and Klakow, Dietrich}, title = {Data-driven Knowledge Extraction for the Food Domain}, series = {Proceedings of the 11th Conference on Natural Language Processing (KONVENS 2012). Empirical Methods in Natural Language Processing, September 19-21, 2012, Vienna, Austria}, editor = {Jancsary, Jeremy}, publisher = {{\"O}sterreichische Gesellschaft f{\"u}r Artificial Intelligence}, address = {Wien}, isbn = {3-85027-005-X}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84529}, series = {Schriftenreihe der {\"O}sterreichischen Gesellschaft f{\"u}r Artificial Intelligence ({\"O}GAI)}, number = {Band 5}, pages = {21 -- 29}, year = {2012}, abstract = {In this paper, we examine methods to automatically extract domain-specific knowledge from the food domain from unlabeled natural language text. We employ different extraction methods ranging from surface patterns to co-occurrence measures applied on different parts of a document. We show that the effectiveness of a particular method depends very much on the relation type considered and that there is no single method that works equally well for every relation type. We also examine a combination of extraction methods and also consider relationships between different relation types. The extraction methods are applied both on a domain-specific corpus and the domain-independent factual knowledge base Wikipedia. Moreover, we examine an open-domain lexical ontology for suitability.}, language = {en} }