@incollection{SchmidtMarx2019,
  author    = {Axel Schmidt and Konstanze Marx},
  title     = {Multimodality as challenge. YouTube data in linguistic corpora},
  series = {Multimodality. Disciplinary thoughts and the challenge of diversity},
  editor    = {Janina Wildfeuer and Jana Pflaeging and John Bateman and Ognyan Seizov and Chiao-I Tseng},
  publisher = {De Gruyter Mouton},
  address   = {Berlin [u.a.]},
  isbn      = {978-3-11-060798-7},
  doi       = {10.1515/9783110608694},
  url       = {https://nbn-resolving.org/urn:nbn:de:bsz:mh39-94089},
  pages     = {115 -- 143},
  year      = {2019},
  abstract  = {A large database is a desirable basis for multimodal analysis. The devel­opment of more elaborate methods, data banks, and tools for a stronger empirical grounding of multimodal analysis is a prevailing topic within multimodality. Prereq- uisite for this are corpora for multimodal data. Our contribution aims at developing a proposal for gathering and building multimodal corpora of audio-visual social media data, predominantly YouTube data.Our contribution has two parts: First we outline a participation framework which is able to represent the complexity of YouTube communication. To this end we ‘dissect’ the different communicative and multimodal layers YouTube consists of. Besides the Video performance YouTube also integrates comments, social media operators, commercials, and announcements for further YouTube Videos. The data consists of various media and modes and is interactively engaged in various discourses. Hence, it is rather difficult to decide what can be considered as a basic communicative unit (or a ‘turn’) and how it can be mapped. Another decision to be made is which elements are of higher priority than others, thus have to be integrated in an adequate transcription format. We illustrate our conceptual considerations on the example of so-called L e t’s Plays, which are supposed to present and comment Computer gaming processes.The second part is devoted to corpus building. Most previous studies either worked with ad hoc data samples or outlined data mining and data sampling strategies. Our main aim is to delineate in a systematic way and based on the conceptual outline in the first part necessary elements which should be part of a YouTube corpus. To this end we describe in a first Step which components (e.g., the Video itself, the comments, the metadata, etc.) should be captured. ln a second Step we outline why and which relations (e.g., screen appearances, hypertextual struc- tures, etc.) are worth to get part of the corpus. In sum, our contribution aims at outlining a proposal for gathering and systematizing multimodal data, specifically audio-visual social media data, in a corpus derived from a conceptual modeling of important communicative processes of the research object itself.},
  language  = {en}
}