@inproceedings{WiegandRothLasarcyketal.2012, author = {Wiegand, Michael and Roth, Benjamin and Lasarcyk, Eva and K{\"o}ser, Stephanie and Klakow, Dietrich}, title = {A Gold Standard for Relation Extraction in the Food Domain}, series = {Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), May 21-27, 2012, Istanbul, Turkey}, editor = {Calzolari, Nicoletta and Choukri, Khalid and Declerck, Thierry and Uğur Doğan, Mehmet and Maegaard, Bente and Mariani, Joseph and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios}, publisher = {European Language Resources Association}, address = {Paris}, isbn = {978-2-9517408-7-7}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84454}, pages = {507 -- 514}, year = {2012}, abstract = {We present a gold standard for semantic relation extraction in the food domain for German. The relation types that we address are motivated by scenarios for which IT applications present a commercial potential, such as virtual customer advice in which a virtual agent assists a customer in a supermarket in finding those products that satisfy their needs best. Moreover, we focus on those relation types that can be extracted from natural language text corpora, ideally content from the internet, such as web forums, that are easy to retrieve. A typical relation type that meets these requirements are pairs of food items that are usually consumed together. Such a relation type could be used by a virtual agent to suggest additional products available in a shop that would potentially complement the items a customer has already in their shopping cart. Our gold standard comprises structural data, i.e. relation tables, which encode relation instances. These tables are vital in order to evaluate natural language processing systems that extract those relations.}, language = {en} } @inproceedings{WiegandBalahurRothetal.2010, author = {Wiegand, Michael and Balahur, Alexandra and Roth, Benjamin and Klakow, Dietrich and Montoyo, Andr{\´e}s}, title = {A Survey on the Role of Negation in Sentiment Analysis}, series = {Proceedings of the Workshop on Negation and Speculation in Natural Language Processing (NeSp-NLP 2010), 10 July 2010, Uppsala, Sweden}, editor = {Morante, Roser and Sporleder, Caroline}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, isbn = {9789057282669}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84250}, pages = {60 -- 68}, year = {2010}, abstract = {This paper presents a survey on the role of negation in sentiment analysis. Negation is a very common linguistic construction that affects polarity and, therefore, needs to be taken into consideration in sentiment analysis. We will present various computational approaches modeling negation in sentiment analysis. We will, in particular, focus on aspects such as level of representation used for sentiment analysis, negation word detection and scope of negation. We will also discuss limits and challenges of negation modeling on that task.}, language = {en} } @inproceedings{WiegandRothKlakow2014, author = {Wiegand, Michael and Roth, Benjamin and Klakow, Dietrich}, title = {Automatic Food Categorization from Large Unlabeled Corpora and Its Impact on Relation Extraction}, series = {Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, April 26-30, 2014, Gothenburg, Sweden}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, isbn = {978-1-937284-78-7}, doi = {10.3115/v1/E14-1071}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84696}, pages = {673 -- 682}, year = {2014}, abstract = {We present a weakly-supervised induction method to assign semantic information to food items. We consider two tasks of categorizations being food-type classification and the distinction of whether a food item is composite or not. The categorizations are induced by a graph-based algorithm applied on a large unlabeled domain-specific corpus. We show that the usage of a domain-specific corpus is vital. We do not only outperform a manually designed open-domain ontology but also prove the usefulness of these categorizations in relation extraction, outperforming state-of-the-art features that include syntactic information and Brown clustering.}, language = {en} } @article{WiegandKlennerKlakow2013, author = {Wiegand, Michael and Klenner, Manfred and Klakow, Dietrich}, title = {Bootstrapping polarity classifiers with rule-based classification}, series = {Language Resources and Evaluation}, volume = {47}, number = {4}, publisher = {Springer}, address = {Dordrecht}, issn = {1574-0218}, doi = {10.1007/s10579-013-9218-3}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84425}, pages = {1049 -- 1088}, year = {2013}, abstract = {In this article, we examine the effectiveness of bootstrapping supervised machine-learning polarity classifiers with the help of a domain-independent rule-based classifier that relies on a lexical resource, i.e., a polarity lexicon and a set of linguistic rules. The benefit of this method is that though no labeled training data are required, it allows a classifier to capture in-domain knowledge by training a supervised classifier with in-domain features, such as bag of words, on instances labeled by a rule-based classifier. Thus, this approach can be considered as a simple and effective method for domain adaptation. Among the list of components of this approach, we investigate how important the quality of the rule-based classifier is and what features are useful for the supervised classifier. In particular, the former addresses the issue in how far linguistic modeling is relevant for this task. We not only examine how this method performs under more difficult settings in which classes are not balanced and mixed reviews are included in the data set but also compare how this linguistically-driven method relates to state-of-the-art statistical domain adaptation.}, language = {en} } @inproceedings{WiegandKlakow2010, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Bootstrapping Supervised Machine-learning Polarity Classifiers with Rule-based Classification}, series = {Proceedings of the 1st Workshop on Computational Approaches to Subjectivity and Sentiment Analysis (WASSA), August 17 2010, Lisbon, Portugal}, publisher = {Universidad de Alicante}, address = {Alicante}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84473}, pages = {59 -- 66}, year = {2010}, abstract = {In this paper, we explore the effectiveness of bootstrapping supervised machine-learning polarity classifiers using the output of domain-independent rule-based classifiers. The benefit of this method is that no labeled training data are required. Still, this method allows to capture in-domain knowledge by training the supervised classifier on in-domain features, such as bag of words. We investigate how important the quality of the rule-based classifier is and what features are useful for the supervised classifier. The former addresses the issue in how far relevant constructions for polarity classification, such as word sense disambiguation, negation modeling, or intensification, are important for this self-training approach. We not only compare how this method relates to conventional semi-supervised learning but also examine how it performs under more difficult settings in which classes are not balanced and mixed reviews are included in the dataset.}, language = {en} } @inproceedings{WiegandRothKlakow2015, author = {Wiegand, Michael and Roth, Benjamin and Klakow, Dietrich}, title = {Combining Pattern-Based and Distributional Similarity for Graph-Based Noun Categorization}, series = {Natural Language Processing and Information Systems. Proceedings of the 20th International Conference on Applications of Natural Language to Information Systems, NLDB 2015, Passau, Germany, June 17-19, 2015}, editor = {Biemann, Chris and Handschuh, Siegfried and Freitas, Andr{\´e} and Meziane, Farid and M{\´e}tais, Elisabeth}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-19580-3}, doi = {10.1007/978-3-319-19581-0_5}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-87479}, series = {Lecture Notes in Computer Science}, number = {9103}, pages = {64 -- 72}, year = {2015}, abstract = {We examine the combination of pattern-based and distributional similarity for the induction of semantic categories. Pattern-based methods are precise and sparse while distributional methods have a higher recall. Given these particular properties we use the prediction of distributional methods as a back-off to pattern-based similarity. Since our pattern-based approach is embedded into a semi-supervised graph clustering algorithm, we also examine how distributional information is best added to that classifier. Our experiments are carried out on 5 different food categorization tasks.}, language = {en} } @inproceedings{WiegandKlakow2010, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Convolution Kernels for Opinion Holder Extraction}, series = {Proceedings of HLT '10 Human Language Technologies: The 2010 Annual Conference of the North American Chapter of the Association for Computational Linguistics, June 2-4, 2010, Los Angeles, California}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, isbn = {978-1-932432-65-7}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84345}, pages = {795 -- 803}, year = {2010}, abstract = {Opinion holder extraction is one of the important subtasks in sentiment analysis. The effective detection of an opinion holder depends on the consideration of various cues on various levels of representation, though they are hard to formulate explicitly as features. In this work, we propose to use convolution kernels for that task which identify meaningful fragments of sequences or trees by themselves. We not only investigate how different levels of information can be effectively combined in different kernels but also examine how the scope of these kernels should be chosen. In general relation extraction, the two candidate entities thought to be involved in a relation are commonly chosen to be the boundaries of sequences and trees. The definition of boundaries in opinion holder extraction, however, is less straightforward since there might be several expressions beside the candidate opinion holder to be eligible for being a boundary.}, language = {en} } @inproceedings{WiegandKlakow2011, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Convolution Kernels for Subjectivity Detection}, series = {Proceedings of the 18th Nordic Conference of Computational Linguistics (NODALIDA 2011), May 11-13, 2011, Riga, Latvia}, editor = {Sandford Pedersen, Bolette and Nešpore, Gunta and Skadiņa, Inguna}, publisher = {Northern European Association for Language Technology}, address = {Uppsala}, issn = {1736-6305}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-85032}, series = {NEALT Proceedings Series}, number = {11}, pages = {254 -- 261}, year = {2011}, abstract = {In this paper, we explore different linguistic structures encoded as convolution kernels for the detection of subjective expressions. The advantage of convolution kernels is that complex structures can be directly provided to a classifier without deriving explicit features. The feature design for the detection of subjective expressions is fairly difficult and there currently exists no commonly accepted feature set. We consider various structures, such as constituency parse structures, dependency parse structures, and predicate-argument structures. In order to generalize from lexical information, we additionally augment these structures with clustering information and the task-specific knowledge of subjective words. The convolution kernels will be compared with a standard vector kernel.}, language = {en} } @inproceedings{WiegandLeidnerKlakow2008, author = {Wiegand, Michael and Leidner, Jochen L. and Klakow, Dietrich}, title = {Cost-Sensitive Learning in Answer Extraction}, series = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08), May 28-30, 2008, Marrakech, Morocco}, editor = {Calzolari, Nicoletta and Choukri, Khalid and Maegaard, Bente and Mariani, Joseph and Odijk, Jan and Piperidis, Stelios and Tapias, Daniel}, publisher = {European Language Resources Association}, address = {Paris}, isbn = {2-9517408-4-0}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-85373}, pages = {711 -- 714}, year = {2008}, abstract = {One problem of data-driven answer extraction in open-domain factoid question answering is that the class distribution of labeled training data is fairly imbalanced. In an ordinary training set, there are far more incorrect answers than correct answers. The class-imbalance is, thus, inherent to the classification task. It has a deteriorating effect on the performance of classifiers trained by standard machine learning algorithms. They usually have a heavy bias towards the majority class, i.e. the class which occurs most often in the training set. In this paper, we propose a method to tackle class imbalance by applying some form of cost-sensitive learning which is preferable to sampling. We present a simple but effective way of estimating the misclassification costs on the basis of class distribution. This approach offers three benefits. Firstly, it maintains the distribution of the classes of the labeled training data. Secondly, this form of meta-learning can be applied to a wide range of common learning algorithms. Thirdly, this approach can be easily implemented with the help of state-of-the-art machine learning software.}, language = {en} } @inproceedings{WiegandRothKlakow2012, author = {Wiegand, Michael and Roth, Benjamin and Klakow, Dietrich}, title = {Data-driven Knowledge Extraction for the Food Domain}, series = {Proceedings of the 11th Conference on Natural Language Processing (KONVENS 2012). Empirical Methods in Natural Language Processing, September 19-21, 2012, Vienna, Austria}, editor = {Jancsary, Jeremy}, publisher = {{\"O}sterreichische Gesellschaft f{\"u}r Artificial Intelligence}, address = {Wien}, isbn = {3-85027-005-X}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84529}, series = {Schriftenreihe der {\"O}sterreichischen Gesellschaft f{\"u}r Artificial Intelligence ({\"O}GAI)}, number = {Band 5}, pages = {21 -- 29}, year = {2012}, abstract = {In this paper, we examine methods to automatically extract domain-specific knowledge from the food domain from unlabeled natural language text. We employ different extraction methods ranging from surface patterns to co-occurrence measures applied on different parts of a document. We show that the effectiveness of a particular method depends very much on the relation type considered and that there is no single method that works equally well for every relation type. We also examine a combination of extraction methods and also consider relationships between different relation types. The extraction methods are applied both on a domain-specific corpus and the domain-independent factual knowledge base Wikipedia. Moreover, we examine an open-domain lexical ontology for suitability.}, language = {en} } @article{WiegandKlakow2015, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Detecting conditional healthiness of food items from natural language text}, series = {Language Resources and Evaluation}, volume = {49}, number = {4}, publisher = {Springer}, address = {Dordrecht}, issn = {1574-0218}, doi = {10.1007/s10579-015-9314-7}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-85428}, pages = {777 -- 830}, year = {2015}, abstract = {In this article, we explore the feasibility of extracting suitable and unsuitable food items for particular health conditions from natural language text. We refer to this task as conditional healthiness classification. For that purpose, we annotate a corpus extracted from forum entries of a food-related website. We identify different relation types that hold between food items and health conditions going beyond a binary distinction of suitability and unsuitability and devise various supervised classifiers using different types of features. We examine the impact of different task-specific resources, such as a healthiness lexicon that lists the healthiness status of a food item and a sentiment lexicon. Moreover, we also consider task-specific linguistic features that disambiguate a context in which mentions of a food item and a health condition co-occur and compare them with standard features using bag of words, part-of-speech information and syntactic parses. We also investigate in how far individual food items and health conditions correlate with specific relation types and try to harness this information for classification.}, language = {en} } @inproceedings{WiegandKlakow2012, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Generalization Methods for In-Domain and Cross-Domain Opinion Holder Extraction}, series = {Proceedings of the 13th Conference of the European Chapter of the Association for Computational Linguistics, April 23-27 2012, Avignon France}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, isbn = {978-1-937284-19-0}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84378}, pages = {325 -- 335}, year = {2012}, abstract = {In this paper, we compare three different generalization methods for in-domain and cross-domain opinion holder extraction being simple unsupervised word clustering, an induction method inspired by distant supervision and the usage of lexical resources. The generalization methods are incorporated into diverse classifiers. We show that generalization causes significant improvements and that the impact of improvement depends on the type of classifier and on how much training and test data differ from each other. We also address the less common case of opinion holders being realized in patient position and suggest approaches including a novel (linguistically-informed) extraction method how to detect those opinion holders without labeled training data as standard datasets contain too few instances of this type.}, language = {en} } @inproceedings{WiegandRothKlakow2012, author = {Wiegand, Michael and Roth, Benjamin and Klakow, Dietrich}, title = {Knowledge Acquisition with Natural Language Processing in the Food Domain: Potential and Challenges}, series = {Proceedings of the Cooking with Computers workshop (CwC), August 28, 2012, Montpellier, France}, editor = {Cordier, Am{\´e}lie and Nauer, Emmanuel}, publisher = {LIRMM}, address = {Montpellier}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-86207}, pages = {46 -- 51}, year = {2012}, abstract = {In this paper, we present an outlook on the effectiveness of natural language processing (NLP) in extracting knowledge for the food domain. We identify potential scenarios that we think are particularly suitable for NLP techniques. As a source for extracting knowledge we will highlight the benefits of textual content from social media. Typical methods that we think would be suitable will be discussed. We will also address potential problems and limits that the application of NLP methods may yield.}, language = {en} } @inproceedings{DembowskiWiegandKlakow2017, author = {Dembowski, Julia and Wiegand, Michael and Klakow, Dietrich}, title = {Language Independent Named Entity Recognition using Distant Supervision}, series = {Human Language Technologies as a Challenge for Computer Science and Linguistics. Proceedings of the 8th Language \& Technology Conference, November 17-19, 2017, Poznań, Poland}, editor = {Vetulani, Zygmunt and Paroubek, Patrick}, publisher = {Fundacja Uniwersytetu im. Adama Mickiewicza}, address = {Poznań}, isbn = {978-83-64864-94-0}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-86198}, pages = {68 -- 72}, year = {2017}, abstract = {While good results have been achieved for named entity recognition (NER) in supervised settings, it remains a problem that for low resource languages and less studied domains little or no labelled data is available. As NER is a crucial preprocessing step for many natural language processing tasks, finding a way to overcome this deficit in data remains of great interest. We propose a distant supervision approach to NER that is both language and domain independent where we automatically generate labelled training data using gazetteers that we previously extracted from Wikipedia. We test our approach on English, German and Estonian data sets and contribute further by introducing several successful methods to reduce the noise in the generated training data. The tested models beat baseline systems and our results show that distant supervision can be a promising approach for NER when no labelled data is available. For the English model we also show that the distant supervision model is better at generalizing within the same domain of news texts by comparing it against a supervised model on a different test set.}, language = {en} } @inproceedings{WiegandRuppenhoferKlakow2013, author = {Wiegand, Michael and Ruppenhofer, Josef and Klakow, Dietrich}, title = {Predicative Adjectives: An Unsupervised Criterion to Extract Subjective Adjectives}, series = {Proceedings of HLT-NAACL 2013}, publisher = {Association for Computational Linguistics}, address = {Atlanta}, isbn = {978-1-937284-47-3}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-52333}, pages = {534 -- 539}, year = {2013}, abstract = {We examine predicative adjectives as an unsupervised criterion to extract subjective adjectives. We do not only compare this criterion with a weakly supervised extraction method but also with gradable adjectives, i.e. another highly subjective subset of adjectives that can be extracted in an unsupervised fashion. In order to prove the robustness of this extraction method, we will evaluate the extraction with the help of two different state-of-the-art sentiment lexicons (as a gold standard).}, language = {en} } @inproceedings{WiegandKlakow2010, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Predictive Features for Detecting Indefinite Polar Sentences}, series = {Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10), May 17-23, 2010, Valletta, Malta}, editor = {Calzolari, Nicoletta and Choukri, Khalid and Maegaard, Bente and Mariani, Joseph and Odijk, Jan and Piperidis, Stelios and Rosner, Mike and Tapias, Daniel}, publisher = {European Language Resources Association}, address = {Paris}, isbn = {2-9517408-6-7}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-85052}, pages = {3092 -- 3096}, year = {2010}, abstract = {In recent years, text classification in sentiment analysis has mostly focused on two types of classification, the distinction between objective and subjective text, i.e. subjectivity detection, and the distinction between positive and negative subjective text, i.e. polarity classification. So far, there has been little work examining the distinction between definite polar subjectivity and indefinite polar subjectivity. While the former are utterances which can be categorized as either positive or negative, the latter cannot be categorized as either of these two categories. This paper presents a small set of domain independent features to detect indefinite polar sentences. The features reflect the linguistic structure underlying these types of utterances. We give evidence for the effectiveness of these features by incorporating them into an unsupervised rule-based classifier for sentence-level analysis and compare its performance with supervised machine learning classifiers, i.e. Support Vector Machines (SVMs) and Nearest Neighbor Classifier (kNN). The data used for the experiments are web-reviews collected from three different domains.}, language = {en} } @inproceedings{WiegandKlakow2009, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Predictive Features in Semi-Supervised Learning for Polarity Classification and the Role of Adjectives}, series = {Proceedings of the 17th Nordic Conference of Computational Linguistics (NODALIDA 2009), May 14-16, 2009, Odense, Denmark}, editor = {Jokinen, Kristiina and Bick, Eckhard}, publisher = {Northern European Association for Language Technology}, address = {Uppsala}, issn = {1736-6305}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84588}, series = {NEALT Proceedings Series}, number = {4}, pages = {198 -- 205}, year = {2009}, abstract = {In opinion mining, there has been only very little work investigating semi-supervised machine learning on document-level polarity classification. We show that semi-supervised learning performs significantly better than supervised learning when only few labelled data are available. Semi-supervised polarity classifiers rely on a predictive feature set. (Semi-)Manually built polarity lexicons are one option but they are expensive to obtain and do not necessarily work in an unknown domain. We show that extracting frequently occurring adjectives \& adverbs of an unlabeled set of in-domain documents is an inexpensive alternative which works equally well throughout different domains.}, language = {en} } @inproceedings{WiegandKlakow2011, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Prototypical Opinion Holders: What We can Learn from Experts and Analysts}, series = {Proceedings of the International Conference on Recent Advances in Natural Language Processing 2011, Hissar, Bulgaria, 12-14 September, 2011}, editor = {Angelova, Galia and Bontcheva, Kalina and Mitkov, Ruslan and Nikolov, Nikolai}, publisher = {Incoma Ltd.}, address = {Shoumen}, issn = {1313-8502}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84674}, pages = {282 -- 288}, year = {2011}, abstract = {In order to automatically extract opinion holders, we propose to harness the contexts of prototypical opinion holders, i.e. common nouns, such as experts or analysts, that describe particular groups of people whose profession or occupation is to form and express opinions towards specific items. We assess their effectiveness in supervised learning where these contexts are regarded as labelled training data and in rule-based classification which uses predicates that frequently co-occur with mentions of the prototypical opinion holders. Finally, we also examine in how far knowledge gained from these contexts can compensate the lack of large amounts of labeled training data in supervised learning by considering various amounts of actually labeled training sets.}, language = {en} } @inproceedings{ReiplingerWiegandKlakow2014, author = {Reiplinger, Melanie and Wiegand, Michael and Klakow, Dietrich}, title = {Relation Extraction for the Food Domain without Labeled Training Data - Is Distant Supervision the Best Solution?}, series = {Advances in Natural Language Processing. Proceedings of the 9th International Conference on NLP, PolTAL 2014, Warsaw, Poland, September 17-19, 2014}, editor = {Przepi{\´o}rkowski, Adam and Ogrodniczuk, Maciej}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-10887-2}, doi = {10.1007/978-3-319-10888-9_35}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-87465}, series = {Lecture Notes in Artificial Intelligence}, number = {8686}, pages = {345 -- 357}, year = {2014}, abstract = {We examine the task of relation extraction in the food domain by employing distant supervision. We focus on the extraction of two relations that are not only relevant to product recommendation in the food domain, but that also have significance in other domains, such as the fashion or electronics domain. In order to select suitable training data, we investigate various degrees of freedom. We consider three processing levels being argument level, sentence level and feature level. As external resources, we employ manually created surface patterns and semantic types on all these levels. We also explore in how far rule-based methods employing the same information are competitive.}, language = {en} } @inproceedings{WiegandKlakow2014, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Separating Brands from Types: an Investigation of Different Features for the Food Domain}, series = {Proceedings of COLING 2014, the 25th International Conference on Computational Linguistics, August 23-29, 2014, Dublin, Ireland: Technical Papers}, publisher = {Dublin City University}, address = {Dublin}, isbn = {978-1-941643-26-6}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84874}, pages = {2291 -- 2302}, year = {2014}, abstract = {We examine the task of separating types from brands in the food domain. Framing the problem as a ranking task, we convert simple textual features extracted from a domain-specific corpus into a ranker without the need of labeled training data. Such method should rank brands (e.g. sprite) higher than types (e.g. lemonade). Apart from that, we also exploit knowledge induced by semi-supervised graph-based clustering for two different purposes. On the one hand, we produce an auxiliary categorization of food items according to the Food Guide Pyramid, and assume that a food item is a type when it belongs to a category unlikely to contain brands. On the other hand, we directly model the task of brand detection using seeds provided by the output of the textual ranking features. We also harness Wikipedia articles as an additional knowledge source.}, language = {en} } @inproceedings{WiegandKlakow2009, author = {Wiegand, Michael and Klakow, Dietrich}, title = {The Role of Knowledge-based Features in Polarity Classification at Sentence Level}, series = {Proceedings of the Twenty-Second International Florida Artificial Intelligence Research Society Conference, 19-21 May 2009, Sanibel Island, Florida, USA}, editor = {Lane, H. Chad and Guesgen, Hans W.}, publisher = {AAAI Press}, address = {Menlo Park, CA}, isbn = {978-1-57735-419-2}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84390}, pages = {296 -- 301}, year = {2009}, abstract = {Though polarity classification has been extensively explored at document level, there has been little work investigating feature design at sentence level. Due to the small number of words within a sentence, polarity classification at sentence level differs substantially from document-level classification in that resulting bag-of-words feature vectors tend to be very sparse resulting in a lower classification accuracy. In this paper, we show that performance can be improved by adding features specifically designed for sentence-level polarity classification. We consider both explicit polarity information and various linguistic features. A great proportion of the improvement that can be obtained by using polarity information can also be achieved by using a set of simple domain-independent linguistic features.}, language = {en} } @inproceedings{WiegandKlakow2011, author = {Wiegand, Michael and Klakow, Dietrich}, title = {The Role of Predicates in Opinion Holder Extraction}, series = {Proceedings of the RANLP 2011 Workshop on Information Extraction and Knowledge Acquisition,16 September, 2011, Hissar, Bulgaria}, publisher = {Incoma Ltd.}, address = {Shoumen}, isbn = {978-954-452-018-2}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84564}, pages = {13 -- 20}, year = {2011}, abstract = {In this paper, we investigate the role of predicates in opinion holder extraction. We will examine the shape of these predicates, investigate what relationship they bear towards opinion holders, determine what resources are potentially useful for acquiring them, and point out limitations of an opinion holder extraction system based on these predicates. For this study, we will carry out an evaluation on a corpus annotated with opinion holders. Our insights are, in particular, important for situations in which no labelled training data are available and only rule-based methods can be applied.}, language = {en} } @inproceedings{WiegandKlakow2013, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Towards Contextual Healthiness Classification of Food Items - A Linguistic Approach}, series = {Proceedings of the Sixth International Joint Conference on Natural Language Processing, October 14-18, 2013, Nagoya, Japan}, publisher = {Asian Federation of Natural Language Processing}, address = {Nagoya}, isbn = {978-4-9907348-0-0}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-85012}, pages = {19 -- 27}, year = {2013}, abstract = {We explore the feasibility of contextual healthiness classification of food items. We present a detailed analysis of the linguistic phenomena that need to be taken into consideration for this task based on a specially annotated corpus extracted from web forum entries. For automatic classification, we compare a supervised classifier and rule-based classification. Beyond linguistically motivated features that include sentiment information we also consider the prior healthiness of food items.}, language = {en} } @inproceedings{WiegandKlakow2013, author = {Wiegand, Michael and Klakow, Dietrich}, title = {Towards the Detection of Reliable Food-Health Relationships}, series = {Proceedings of the Workshop on Language Analysis in Social Media, 13 June 2013, Atlanta, Georgia}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, organization = {Association for Computational Linguistics}, isbn = {978-1-937284-47-3}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-84660}, pages = {69 -- 79}, year = {2013}, abstract = {We investigate the task of detecting reliable statements about food-health relationships from natural language texts. For that purpose, we created a specially annotated web corpus from forum entries discussing the healthiness of certain food items. We examine a set of task-specific features (mostly) based on linguistic insights that are instrumental in finding utterances that are commonly perceived as reliable. These features are incorporated in a supervised classifier and compared against standard features that are widely used for various tasks in natural language processing, such as bag of words, part-of speech and syntactic parse information.}, language = {en} } @inproceedings{WiegandRothKlakow2012, author = {Wiegand, Michael and Roth, Benjamin and Klakow, Dietrich}, title = {Web-Based Relation Extraction for the Food Domain}, series = {Natural Language Processing and Information Systems. Proceedings of the 17th International Conference on Applications of Natural Language to Information Systems, NLDB 2012, Groningen, The Netherlands, June 26-28, 2012}, editor = {Bouma, Gosse and Ittoo, Ashwin and M{\´e}tais, Elisabeth and Wortmann, Hans}, publisher = {Springer}, address = {Berlin [u.a.]}, isbn = {978-3-642-31177-2}, doi = {10.1007/978-3-642-31178-9_25}, url = {http://nbn-resolving.de/urn:nbn:de:bsz:mh39-87454}, series = {Lecture Notes in Computer Science}, number = {7337}, pages = {222 -- 227}, year = {2012}, abstract = {In this paper, we examine methods to extract different domain-specific relations from the food domain. We employ different extraction methods ranging from surface patterns to co-occurrence measures applied on different parts of a document. We show that the effectiveness of a particular method depends very much on the relation type considered and that there is no single method that works equally well for every relation type. As we need to process a large amount of unlabeled data our methods only require a low level of linguistic processing. This has also the advantage that these methods can provide responses in real time.}, language = {en} }