BibTeX
@InProceedings{adel2018domainindependent,
address = {Brussels, Belgium},
author = {Adel, Heike and Bostan, Laura Ana Maria and Papay, Sean and Padó, Sebastian and Klinger, Roman},
booktitle = {Proceedings of EMNLP},
title = {DERE: A task and domain-independent slot filling framework for declarative relation extraction},
abstract = { Most machine learning systems for natural language processing are
tailored to specific tasks. As a result, comparability of models
across tasks is missing and their applicability to new tasks is
limited. This affects end users without machine learning experience
as well as model developers. To address these limitations, we
present DeRe, a novel framework for declarative specification and
compilation of template-based information extraction. It uses a
generic specification language for the task and for data annotations
in terms of spans and frames. This formalism enables the
representation of a large variety of natural language processing
challenges. The backend can be instantiated by different models,
following different paradigms. The clear separation of frame
specification and model backend will ease the implementation of new
models and the evaluation of different models across different
tasks. Furthermore, it simplifies transfer learning, joint learning
across tasks and/or domains as well as the assessment of model
generalizability. DeRe is available as open source.},
year = {2018},
url = {https://aclweb.org/anthology/D18-2008.pdf},
pages = {42-47}}
@InProceedings{alagic18:_lever_lexic_subst_unsup_word_sense_induc,
author = {Domagoj Alagi{c} and Jan v {S}najder and Sebastian Padó},
title = {Leveraging Lexical Substitutes for Unsupervised Word Sense Induction},
booktitle = {Proceedings of AAAI},
year = 2018,
address = {New Orleans, LA}}
@InProceedings{alam21:_new_domain_major_effor,
abstract = {Reliable tagging of Temporal Expressions (TEs, e.g., Book a table at LâOsteria for Sunday evening) is a central requirement for Voice Assistants (VAs). However, there is a dearth of resources and systems for the VA domain, since publicly-available temporal taggers are trained only on substantially different domains, such as news and clinical text.
Since the cost of annotating large datasets is prohibitive, we investigate the trade-off between in-domain data and performance in DA-Time, a hybrid temporal tagger for the English VA domain which combines a neural architecture for robust TE recognition, with a parser-based TE normalizer. We find that transfer learning goes a long way even with as little as 25 in-domain sentences: DA-Time performs at the state of the art on the news domain, and substantially outperforms it on the VA domain.},
added-at = {2021-04-17T21:09:02.000+0200},
address = {Online},
author = {Alam, Touhidul and Zarcone, Alessandra and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/23370f1e6a50186bf152c32cae388eaa8/sp},
booktitle = {Proceedings of IWCS},
interhash = {640672c5b09f9640eafc620ec084ebfe},
intrahash = {3370f1e6a50186bf152c32cae388eaa8},
keywords = {conference myown},
pages = {144-154},
timestamp = {2021-08-11T13:57:32.000+0200},
title = {New Domain, Major Effort? {H}ow Much Data is Necessary to Adapt a Temporal Tagger To the Voice Assistant Domain},
url = {https://iwcs2021.github.io/proceedings/iwcs/pdf/2021.iwcs-1.14.pdf},
year = 2021
}
@InProceedings{andreou16:_instan_englis,
author = {Marios Andreou and Lea Kawaletz and Max Kisselew and Gabriella Lapesa and Sebastian Padó and Ingo Plag},
title = {Instance-based disambiguation of {E}nglish textit {-ment} derivatives},
booktitle = {Proceedings of the conference on cognitive structures: Linguistic, Philosophical and Psychological Perspectives},
year = 2016,
keywords = {myown abstract},
address = {D{u}sseldorf, Germany}}
@article{apidianaki24:_languag_learn_repres_proces_human_machin,
added-at = {2024-09-28T13:54:00.000+0200},
author = {Apidianaki, Marianna and Fourtassi, Abdellah and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2e7ccddb8d78164c13e58120d1022defe/sp},
interhash = {64c576e7daf8554f4a2f304cd86632eb},
intrahash = {e7ccddb8d78164c13e58120d1022defe},
journal = {Computational Linguistics},
keywords = {article myown},
note = {Introduction to the Special Issue},
timestamp = {2024-09-28T13:54:00.000+0200},
title = {Language Learning, Representation, and
Processing in Humans and Machines:
Introduction to the Special Issue},
url = {https://doi.org/10.1162/coli_e_00539},
year = 2024
}
@InProceedings{augenstein12:_lodif,
author = {Isabelle Augenstein and Sebastian Pad{ó} and Sebastian Rudolph},
title = {{LODifier: Generating Linked Data from Unstructured Text}},
booktitle = {Proceedings of ESWC 2012},
year = {2012},
address = {Heraklion, Greece}
}
@InProceedings{babych11:_depen_based_quest_valid_for_german,
author = {Svitlana Babych and Alexander Henn and Jan Pawellek and Sebastian
Pad{ó}},
title = {Dependency-based Question Validation for {G}erman},
booktitle = {CLEF Working Notes},
year = {2011},
address = {Amsterdam, Netherlands}
}
@InProceedings{baric24:_actor_ident_discour,
author = {Ana Baric and Sebastian Padó and Sean Papay},
title = {Actor Identification in Discourse: {A} Challenge for {LLM}s?},
booktitle = {Proceedings of the CODI workshop},
year = 2024,
address = {St Julian's, Malta},
url = {https://aclanthology.org/2024.codi-1.6}}
@InProceedings{baroni17:_show,
author = {Marco Baroni and Gemma Boleda and Sebastian Padó},
title = {{''Show me the cup''}: Reference with Continuous Representations},
booktitle = {Proceedings of CICLing},
year = 2017,
address = {Budapest, Hungary},
keywords = {conference myown},
url = {https://arxiv.org/pdf/1606.08777},
abstract = {One of the most basic functions of language is to refer to objects in a shared scene. Modeling reference with continuous representations is challenging because it requires individuation, i.e., tracking and distinguishing an arbitrary number of referents. We introduce a neural network model that, given a definite description and a set of objects represented by natural images, points to the intended object if the expression has a unique referent, or indicates a failure, if it does not. The model, directly trained on reference acts, is competitive with a pipeline manually engineered to perform the same task, both when referents are purely visual, and when they are characterized by a combination of visual and linguistic properties.}}
@InProceedings{benikova14:_germev_named_entit_recog_shared_task,
author = {Darina Benikova and Chris Biemann and Max Kisselew and Sebastian
Pad{ó}},
title = {{GermEval 2014 Named Entity Recognition Shared Task: Companion Paper}},
booktitle = {Proceedings of the KONVENS GermEval workshop},
year = {2014},
pages = {104-112},
address = {Hildesheim, Germany}
}
@InProceedings{blessing19:_envir_relat_annot_polit_debat,
author = {Andre Blessing and Nico Blokker and Sebastian Haunss and Jonas Kuhn and Gabriella Lapesa and Sebastian Padó},
title = {An Environment for the Relational Annotation of Political Debates},
booktitle = {Proceedings of ACL System Demonstrations},
year = 2019,
address = {Florence, Italy},
keywords = {conference myown}}
@article{blokker:_between,
abstract = {Newspaper reports provide a rich source of information on the unfolding of public debates, which can serve as basis for inquiry in political science. Such debates are often triggered by critical events, which attract public attention and incite the reactions of political actors: crisis sparks the debate. However, due to the challenges of reliable annotation and modeling, few large-scale datasets with high-quality annotation are available. This paper introduces DebateNet2.0, which traces the political discourse on the 2015 European refugee crisis in the German quality newspaper taz. The core units of our annotation are political claims (requests for specific actions to be taken) and the actors who advance them (politicians, parties, etc.). Our contribution is twofold. First, we document and release DebateNet2.0 along with its companion R package, mardyR. Second, we outline and apply a Discourse Network Analysis (DNA) to DebateNet2.0, comparing two crucial moments of the policy debate on the ârefugee crisisâ: the migration flux through the Mediterranean in April/May and the one along the Balkan route in September/October. We guide the reader through the methods involved in constructing a discourse network from a newspaper, demonstrating that there is not one single discourse network for the German migration debate, but multiple ones, depending on the research question through the associated choices regarding political actors, policy fields and time spans.},
added-at = {2021-11-22T08:09:12.000+0100},
author = {Blokker, Nico and Blessing, Andre and Dayanik, Erenay and Kuhn, Jonas and Pad{ó}, Sebastian and Lapesa, Gabriella},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2c17511a37e3ad0d3d416c2b7041e972d/sp},
interhash = {1942e48042e3ead163e5b2cd226baec5},
intrahash = {c17511a37e3ad0d3d416c2b7041e972d},
journal = {Language Resources and Evaluation},
keywords = {article myown},
pages = {121-153},
timestamp = {2023-05-11T17:11:35.000+0200},
title = {Between welcome culture and border fence: The {E}uropean refugee crisis in {G}erman newspaper reports},
url = {https://doi.org/10.1007/s10579-023-09641-8},
volume = 57,
year = 2023
}
@InProceedings{blokker22:_why_justif_claim_matter_under_party_posit,
author = {Nico Blokker and Tanise Ceron and Andre Blessing and Erenay Dayanik and Sebastian Haunss and Jonas Kuhn and Gabriella Lapesa and Sebastian Padó},
title = {Why Justifications of Claims Matter for Understanding Party Positions},
keywords = {myown workshop},
booktitle = {Proceedings of the 2nd Workshop on Computational Linguistics for Political Text Analysis},
url = {https://old.gscl.org/media/pages/arbeitskreise/cpss/cpss-2022/workshop-proceedings-2022/254133848-1662996909/cpss-2022-proceedings.pdf},
year = 2022}
@InProceedings{blokker20:_swimm_tide,
added-at = {2020-10-02T09:43:59.000+0200},
address = {Online},
author = {Blokker, Nico and Dayanik, Erenay and Lapesa, Gabriella and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2c8caad10654ddbc031a19634a4204f7a/sp},
booktitle = {Proceedings of the {NLP+CSS} workshop},
interhash = {9399eb41d74c6c2be84a191bfa6d1885},
intrahash = {c8caad10654ddbc031a19634a4204f7a},
keywords = {myown workshop},
pages = {24-34},
timestamp = {2020-11-11T21:06:25.000+0100},
title = {Swimming with the Tide? Positional Claim Detection across Political Text Types},
url = {https://www.aclweb.org/anthology/2020.nlpcss-1.3/},
year = 2020
}
@InProceedings{boleda17:_instan_concep_distr_space,
abstract = {Instances (``Mozart'') are ontologically distinct from
concepts or classes (``composer''). Natural language
encompasses both, but instances have received
comparatively little attention in distributional
semantics. Our results show that instances and
concepts differ in their distributional
properties. We also establish that instantiation
detection (``Mozart -- composer'') is generally
easier than hypernymy detection (``chemist --
scientist''), and that results on the influence of
input representation do not transfer from hyponymy
to instantiation.},
address = {Valencia, Spain},
author = {Boleda, Gemma and Gupta, Abhijeet and Pad{o}, Sebastian},
booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics},
interhash = {4fccc5bec96863e48b0aa1efc00cf904},
intrahash = {572c75d2a71ba7d51970dc1256c905e3},
month = {April},
pages = {79-85},
title = {Instances and concepts in distributional space},
url = {https://www.aclweb.org/anthology/E17-2013.pdf},
year = 2017
}
@InProceedings{boleda17:_livin_discr_life_in_contin_world,
author = {Gemma Boleda and Sebastian Padó and Nghia The Pham and Marco Baroni},
title = {Living a discrete life in a continuous world: Reference in cross-modal entity tracking},
keywords = {conference myown},
booktitle = {Proceedings of IWCS},
year = 2017,
url = {https://www.aclweb.org/anthology/W/W17/W17-6904.pdf},
address = {Montpellier, France}}
@InProceedings{boleda12:regular,
author = {Boleda, Gemma and Pad{ó}, Sebastian and Utt, Jason},
title = {Regular polysemy: A distributional model},
booktitle = {Proceedings of {*}SEM 2012},
year = {2012},
address = {Montreal, Canada},
date-added = {2013-05-22 15:04:55 +0000},
date-modified = {2013-05-22 15:04:55 +0000}
}
@InProceedings{Braun2018,
author = {Manuel Braun and Roman Klinger and Sebastian Padó and Gabriel Viehhauser},
title = {{Digitale Modellierung von Figurenkomplexität am Beispiel des Parzival von Wolfram von Eschenbach}},
booktitle = {Book of Abstracts -- Digital Humanities im deutschsprachigen Raum},
year = {2018},
address = {Cologne, Germany},
month = {March},
url = {https://www.romanklinger.de/publications/BraunKlingerPadoViehhauser2018.html},
pdf = {https://www.romanklinger.de/publications/viehhauser2018dhd.pdf}
}
@InProceedings{buljan18:_lexic_subst_evaluat_compos_distr_model,
author = {Maja Buljan and Sebastian Padó and Jan v {S}najder},
title = {Lexical Substitution for Evaluating Compositional Distributional Models},
booktitle = {Proceedings of NAACL},
keywords = {conference myown},
year = 2018,
url= {https://aclweb.org/anthology/N18-2033.pdf},
pages = {206-211},
abstract = {Compositional Distributional Semantic Models (CDSMs) model the meaning of phrases and sentences in vector space. They have been predominantly evaluated on limited, artificial tasks such as semantic sentence similarity on hand-constructed datasets. This paper argues for lexical substitution as a means to evaluate CDSMs. Lexical substitution is a more natural task, enables us to evaluate meaning composition at the level of individual words, and provides a common ground to compare CDSMs with dedicated lexical substitution models. We create a lexical substitution dataset for CDSM evaluation from an English-language corpus with manual âall-wordsâ lexical substitution annotation. Our experiments indicate that the Practical Lexical Function CDSM outperforms simple component-wise CDSMs and performs on par with the context2vec lexical substitution model using the same context.},
address = {New Orleans, LA},
note = {Acceptance rate: 29
@incollection{burchardtetal:forth,
author = {Aljoscha Burchardt and Katrin Erk and Anette Frank and Andrea Kowalski
and Sebastian Pad{ó} and Manfred Pinkal},
title = {Using {FrameNet} for the Semantic Analysis of {G}erman: Annotation,
Representation, and Automation},
booktitle = {Multilingual FrameNets -- Practice and Applications},
publisher = {Mouton de Gruyter},
year = {2009},
editor = {Hans C. Boas},
pages = {209-244},
address = {Berlin}
}
@article{burchardt08:constructing,
author = {Aljoscha Burchardt and Sebastian Pad{ó} and Dennis Spohr and Anette
Frank and Ulrich Heid},
title = {Constructing Integrated Corpus and Lexicon Models for Multi-Layer
Annotations in {OWL DL}},
journal = {Linguistic Issues in Language Technology},
year = {2008},
volume = {1},
pages = {1-33},
number = {1}
}
@InProceedings{burchardt08:_formal_multi_corpor_owl_dl,
author = {Aljoscha Burchardt and Sebastian Pad{ó} and Dennis Spohr and Anette
Frank and Ulrich Heid},
title = {Formalising Multi-layer Corpora in {OWL DL} -- Lexicon Modelling,
Querying and Consistency Control},
booktitle = {Proceedings of IJCNLP},
year = {2008},
address = {Hyderabad, India}
}
@inproceedings{ceron24:_autom_analy_polit_debat_manif,
added-at = {2024-02-09T10:41:00.000+0100},
address = {Bielefeld, Germany},
author = {Ceron, Tanise and Baric, Ana and Blessing, André and Haunss, Sebastian and Kuhn, Jonas and Lapesa, Gabriella and Padó, Sebastian and Papay, Sean and Zauchner, Patricia},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2c3b7e7e31de03d009b076a3621ca96f1/sp},
booktitle = {Proceedings of the 1st International Conference on Robust Argumentation Machines},
interhash = {2bdc64ca90d70bb4cca43d02f06f3ff1},
intrahash = {c3b7e7e31de03d009b076a3621ca96f1},
keywords = {conference myown},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
timestamp = {2024-07-16T09:05:26.000+0200},
title = {Automatic Analysis of Political Debates and Manifestos: Successes and Challenges},
url = {https://doi.org/10.1007/978-3-031-63536-6_5},
volume = 14638,
year = 2024
}
@InProceedings{ceron22:_optim,
author = {Tanise Ceron and Nico Blokker and Sebastian Padó},
title = {Optimizing text representations to capture (dis)similarity between political parties},
booktitle = {Proceedings of CoNLL},
keywords = {conference myown},
note = {Accepted for publication},
year = 2022}
@Article{ceronetal24,
title = {Beyond prompt brittleness: {E}valuating the reliability and consistency of political worldviews in LLMs},
author = {Tanise Ceron and Neele Falk and Ana Baric and Dmitry Nikolaev and Sebastian Padó},
journal = {Transactions of the Association for Computational Linguistics},
comment = {Accepted for publication},
url = {https://arxiv.org/abs/2402.17649},
year = {2024}}
@inproceedings{ceron23:_addit,
abstract = {Automatic extraction of party (dis)similarities from texts such as party election manifestos or parliamentary speeches plays an increasing role in computational political science. How- ever, existing approaches are fundamentally limited to targeting only global party (dis)- similarity: they condense the relationship be- tween a pair of parties into a single figure, their similarity. In aggregating over all policy do- mains (e.g., health or foreign policy), they do not provide any qualitative insights into which domains parties agree or disagree on.
This paper proposes a workflow for estimat- ing policy domain aware party similarity that overcomes this limitation. The workflow cov- ers (a) definition of suitable policy domains; (b) automatic labeling of domains, if no man- ual labels are available; (c) computation of domain-level similarities and aggregation at a global level; (d) extraction of interpretable party positions on major policy axes via mul- tidimensional scaling. We evaluate our work- flow on manifestos from the German federal elections. We find that our method (a) yields high correlation when predicting party similar- ity at a global level and (b) provides accurate party-specific positions, even with automati- cally labelled policy domains.},
added-at = {2023-05-02T15:14:08.000+0200},
address = {Toronto, Canada},
author = {Ceron, Tanise and Nikolaev, Dmitry and Pad{ó}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2c802a4bcaa362732985cfbe6b4ab376e/sp},
booktitle = {Findings of ACL},
interhash = {c15728109a92b2c1f84c603ca1c5249b},
intrahash = {c802a4bcaa362732985cfbe6b4ab376e},
keywords = {conference myown},
timestamp = {2023-07-10T16:07:05.000+0200},
title = {Additive manifesto decomposition: {A} policy domain aware method for understanding party positioning},
url = {https://aclanthology.org/2023.findings-acl.499/},
year = 2023
}
@article{culo08:comparing,
author = {Oliver { v {C}}ulo and Katrin Erk and Sebastian Pad{ó} and Sabine
{Schulte im Walde}},
title = {Comparing and Combining Semantic Verb Classifications},
journal = {Language Resources and Evaluation},
year = {2008},
volume = {42},
pages = {265-291},
number = {3},
keywords = {Gramotron}
}
@InProceedings{dagan14:_excit_open_platf_textual_infer,
author = {Ido Dagan and Omer Levy and Bernardo Magnini and Tae-Gil Noh and
Sebastian Pad{ó} and Asher Stern and Roberto Zanoli},
title = {The {EXCITEMENT Open Platform} for Textual Inferences},
booktitle = {Proceedings of ACL (Demonstration Papers)},
year = {2014},
pages = {43-48},
address = {Baltimore, MD},
url = {https://www.aclweb.org/anthology/P/P14/P14-5008}
}
@InProceedings{dayanik21:_using_hierar_class_struc_improv,
added-at = {2021-06-01T20:41:13.000+0200},
address = {Bangkok, Thailand},
author = {Dayanik, Erenay and Blessing, Andre and Blokker, Nico and Haunss, Sebastian and Kuhn, Jonas and Lapesa, Gabriella and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/20f6d2fc4aa639e7210990201291d5a5c/sp},
booktitle = {Proceedings of the ACL Workshop of Structured Prediction},
interhash = {dd2830015fb7948b28906be22cd03a64},
intrahash = {0f6d2fc4aa639e7210990201291d5a5c},
keywords = {myown workshop},
timestamp = {2021-08-05T19:17:39.000+0200},
title = {Using Hierarchical Class Structure to Improve Fine-Grained Claim Classification},
url = {https://aclanthology.org/2021.spnlp-1.6/},
year = 2021
}
@InProceedings{dayanik22:improving,
author = {Erenay Dayanik and Andre Blessing and Nico Blokker and Sebastian Haunss and Jonas Kuhn and Gabriella Lapesa and Sebastian Padó},
title = {Improving Neural Political Statement Classification with Class Hierarchical Information},
keywords = {conference myown preprint},
booktitle = {Findings of ACL},
year = 2022,
pages = "2367-2382",
note = {Acceptance rate: 31.4 url = {https://aclanthology.org/2022.findings-acl.186},
address = {Dublin, Ireland},
}
@InProceedings{dayanik20:_maskin_actor_infor_leads_fairer,
abstract = {A central concern in Computational Social Sciences (CSS) is fairness: where the role of NLP is to scale up text analysis to large corpora, the quality of automatic analyses should be as independent as possible of textual properties. We analyze the performance of a state-of-the-art neural model on the task of political claims detection (i.e., the identification of forward-looking statements made by political actors) and identify a strong frequency bias: claims made by frequent actors are recognized better. We propose two simple debiasing methods which mask proper names and pronouns during training of the model, thus removing personal information bias. We find that (a) these methods significantly decrease frequency bias while keeping the overall performance stable; and (b) the resulting models improve when evaluated in an out-of-domain setting.},
added-at = {2020-04-04T15:05:44.000+0200},
address = {Online},
author = {Dayanik, Erenay and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/23293989e68e3eda1db5018a5ac18dee4/sp},
booktitle = {Proceedings of ACL},
interhash = {00061c6a1cf89353a1c20cb29b483974},
intrahash = {3293989e68e3eda1db5018a5ac18dee4},
keywords = {conference myown},
pages = {4385-4391},
timestamp = {2020-12-07T16:42:14.000+0100},
title = {Masking Actor Information Leads to Fairer Political Claims Detection},
url = {https://www.aclweb.org/anthology/2020.acl-main.404/},
year = 2020
}
@InProceedings{dayanik21:_disen_docum_topic_author_gender_multip_languag,
added-at = {2021-02-23T10:08:29.000+0100},
author = {Dayanik, Erenay and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/29f3e2e70efa78c0dd97ae2f4b2f071ac/sp},
booktitle = {Proceedings of the EACL WASSA workshop},
interhash = {e172465d2cf0dd67ab27a810d641f629},
intrahash = {9f3e2e70efa78c0dd97ae2f4b2f071ac},
keywords = {myown workshop},
pages = {40-49},
timestamp = {2021-04-21T15:24:46.000+0200},
title = {Disentangling Document Topic and Author Gender in Multiple Languages: Lessons for Adversarial Debiasing},
url = {https://www.aclweb.org/anthology/2021.wassa-1.6},
year = 2021
}
@article{dayanik22:_better_under_bias_nlp_model,
author = {Erenay Dayanik and Thang Vu and Sebastian Padó},
title = {Bias Identification and Attribution in {NLP} Models with Regression and Effect Sizes},
keywords = {article myown},
journal = {Northern European Journal of Language Technology},
year = {2022},
number = {1},
volume = {8},
url = {https://doi.org/10.3384/nejlt.2000-1533.2022.3505}}
@InProceedings{marneffe09:_multi_word_expres_in_textual_entail,
author = {Marie-Catherine de Marneffe and Sebastian Pad{ó} and Christopher
D. Manning},
title = {Multi-word expressions in Textual Entailment: Much ado about nothing?},
booktitle = {Proceedings of the ACL TextInfer workshop},
year = {2009},
address = {Singapore}
}
@incollection{b.11:_machin_trans_evaluat_and_optim,
author = {B. Dorr and Y. Al-Onaizan and M. Galley and N. Habash and D. Jones
and S. Kulick and A. Lavie and G. Leusch and N. Madnani and C. Manning
and M. Marcus and A. Mauser and M. Ostendorf and S. Pad{ó} and
M. Przybocki and A. Rosti and R. Schwartz and M. Snover and C. Tate
and S. Vogel and C. Voss},
title = {Machine Translation Evaluation and Optimization},
booktitle = {Handbook of Natural Language Processing and Machine Translation:
DARPA Global Autonomous Language Exploitation},
publisher = {Springer},
year = {2011},
editor = {Joseph Olive and Caitlin Christianson and Jon McCary}
}
@Article{Ehrlicher2019,
author = {Hanno Ehrlicher and Roman Klinger and Jörg Lehmann and Sebastian Padó},
title = {Measuring Historical Emotions and Their Evolution:
An Interdisciplinary Endeavour to Investigate The
`Emotions of Encounter'},
journal = {Laboratório Interdisciplinar sobre Informa {c}~{a}o e Conhecimento em revista (Liinc em revista)},
year = {2019},
volume = {15},
number = {1},
pages = {},
doi = {10.18617/liinc.v15i1.4557},
pdf = {https://www.romanklinger.de/publications/EhrlicherEtAl2019.pdf},
entrysubtype={journal},
}
@InProceedings{erk05:_analy,
author = {Katrin Erk and Sebastian Pad{ó}},
title = {Analysing models for semantic role assignment using confusability},
booktitle = {Proceedings of HLT/EMNLP 2005},
year = {2005},
address = {Vancouver, BC}
}
@InProceedings{erk:_shalm,
author = {Katrin Erk and Sebastian Pad{ó}},
title = {Shalmaneser - a flexible toolbox for semantic role assignment},
booktitle = {Proceedings of {LREC} 2006},
year = {2006},
address = {Genoa, Italy}
}
@InProceedings{erk07:_towar_comput_model_gradien_word_sense,
author = {Katrin Erk and Sebastian Pad{ó}},
title = {Towards a Computational Model of Gradience in Word Sense},
booktitle = {Proceedings of IWCS-7},
year = {2007},
address = {Tilburg, The Netherlands}
}
@InProceedings{erk08:_struc_vector_space_model_word_meanin_contex,
author = {Katrin Erk and Sebastian Pad{ó}},
title = {A Structured Vector Space Model for Word Meaning in Context},
booktitle = {Proceedings of EMNLP},
year = {2008},
address = {Honolulu, HI}
}
@InProceedings{erk09:_parap_asses_in_struc_vector_space,
author = {Katrin Erk and Sebastian Pad{ó}},
title = {Paraphrase assessment in structured vector space: Exploring parameters
and datasets},
booktitle = {Proceedings of the EACL Workshop on Geometrical Methods for Natural
Language Semantics},
year = {2009},
address = {Athens, Greece}
}
@InProceedings{erk10:_exemp_based_model_for_word,
author = {Katrin Erk and Sebastian Pad{ó}},
title = {Exemplar-Based Models for Word Meaning In Context},
booktitle = {Proceedings of ACL 2010},
year = {2010},
address = {Uppsala, Sweden}
}
@article{erk:_flexib_corpus_driven_model_of,
author = {Katrin Erk and Sebastian Pad{ó} and Ulrike Pad{ó}},
title = {A Flexible, Corpus-driven Model of Regular and Inverse Selectional
Preferences},
journal = {Computational Linguistics},
year = {2010},
volume = {36},
pages = {723-763},
number = {4}
}
@InProceedings{faruqui11:_sound_based_trans_correc_in_urdu,
author = {Manaal Faruqui and Prasenjit Majumdar and Sebastian Pad{ó}},
title = {Soundex-based Translation Correction in {Urdu-English} Cross-Language
Information Retrieval},
booktitle = {Proceedings of the IJCNLP Workshop on Cross-Lingual Information Retrieval},
year = {2011},
address = {Chiang Mai, Thailand}
}
@InProceedings{faruqui10:_training,
author = {Manaal Faruqui and Sebastian Pad{ó}},
title = {Training and Evaluating a {G}erman Named Entity Recognizer with Semantic
Generalization},
booktitle = {Proceedings of KONVENS 2010},
year = {2010},
address = {Saarbr{ü}cken, Germany}
}
@InProceedings{faruqui11:_acquir_entail_pairs_acros_languag_and_domain,
author = {Manaal Faruqui and Sebastian Pad{ó}},
title = {Acquiring entailment pairs across languages and domains: A Data Analysis},
booktitle = {Proceedings of IWCS 2011},
year = {2011},
address = {Oxford, UK}
}
@InProceedings{faruqui-pado:2011:acl-hlt2011,
author = {Faruqui, Manaal and Pad{ó}, Sebastian},
title = {{``I Thou Thee, Thou Traitor''}: Predicting Formal vs. Informal Address
in {E}nglish Literature},
booktitle = {Proceedings of ACL/HLT 2011},
year = {2011},
pages = {467-472},
address = {Portland, OR}
}
@InProceedings{faruqui12:_towar_model_of_formal_and,
author = {Faruqui, Manaal and Pad{ó}, Sebastian},
title = {Towards a model of formal and information address in {E}nglish},
booktitle = {Proceedings of EACL 2012},
year = {2012},
address = {Avignon, France}
}
@InProceedings{feizabadi12:_autom_ident_motion_verbs_wordn,
author = {Parvin Sadat Feizabadi and Sebastian Pad{ó}},
title = {Automatic Identification of Motion Verbs in {WordNet} and {FrameNet}
for Locational Inference},
booktitle = {Proceedings of KONVENS},
year = {2012},
address = {Vienna, Austria}
}
@InProceedings{feizabadi15:_combin_seemin_incom_corpor_implic,
author = {Parvin Sadat Feizabadi and Sebastian Pad{ó}},
title = {Combining Seemingly Incompatible Corpora for Implicit Semantic Role
Labeling},
booktitle = {Proceedings of STARSEM},
year = {2015},
pages = {40-50},
address = {Denver, CO},
url = {https://www.aclweb.org/anthology/S/S15/S15-1005}
}
@incollection{frank09:_seman_comput_lexic,
author = {Anette Frank and Sebastian Pad{ó}},
title = {Semantics in Computational Lexicons},
booktitle = {Handbook of Semantics},
publisher = {Mouton de Guyter},
year = {2012},
editor = {Claudia Maienborn and Klaus von Heusinger and Paul Portner},
volume = {3}
}
@InProceedings{frontini14:_polys,
author = {Francesca Frontini and Valeria Quochi and Sebastian Pad{ó} and
Monica Monachini and Jason Utt},
title = {Polysemy index for nouns: an experiment on Italian using the PAROLE
SIMPLE CLIPS lexical database},
booktitle = {Proceedings of LREC},
year = {2014},
address = {Reykjav{í}k, Iceland}
}
@InProceedings{gorzitze12:_corpus_acquis_german_event_objec_denot_nouns,
author = {Stefan Gorzitze and Sebastian Pad{ó}},
title = {Corpus-based Acquisition of {G}erman Event- and Object-Denoting Nouns},
booktitle = {Proceedings of KONVENS},
year = {2012},
address = {Vienna, Austria}
}
@InProceedings{gupta15:_distr,
author = {Abhijeet Gupta and Gemma Boleda and Marco Baroni and Sebastian Pad{ó}},
title = {Distributional vectors encode referential attributes},
booktitle = {Proceedings of EMNLP},
year = {2015},
address = {Lisbon, Portugal},
url = {https://www.aclweb.org/anthology/D/D15/D15-1002}
}
@InProceedings{abhijeet15:_mappin,
author = {Abhijeet Gupta and Gemma Boleda and Marco Baroni and Sebastian Pad{ó}},
title = {Mapping conceptual features to referential properties},
booktitle = {Procedings of the 3rd international {ESSENCE} workshop: Algorithms
for processing meaning},
year = {2015},
address = {Barcelona, Spain}
}
@InProceedings{gupta17:_distr_predic_relat_entit,
author = {Abhijeet Gupta and Gemma Boleda and Sebastian Padó},
title = {Distributed Prediction of Relations for Entities: The Easy, The Difficult, and The Impossible},
booktitle = {Proceedings of STARSEM},
keywords = {conference myown},
year = 2017,
address = {Vancouver, BC},
pages = {104-109},
note = {Acceptance rate: 36 url = {https://www.aclweb.org/anthology/S17-1012.pdf}}
@InProceedings{gupta15:_dissec_pract_lexic_funct_model,
author = {Abhijeet Gupta and Jason Utt and Sebastian Pad{ó}},
title = {Dissecting the Practical Lexical Function Model for Compositional
Distributional Semantics},
booktitle = {Proceedings of STARSEM},
year = {2015},
pages = {153-158},
address = {Denver, CO},
url = {https://www.aclweb.org/anthology/S15-1017}
}
@InProceedings{hajic09:_conll_shared_task,
author = {Jan Haji v {c} and Massimiliano Ciaramita and Richard Johansson and
Daisuke Kawahara and Maria A. Mart{ì} and Llu{í}s M{à}rquez
and Adam Meyers and Joakim Nivre and Sebastian Pad{ó} and Jan v {S}tep{á}nek
and Pavel Stra v {n}{á}k and Mihai Surdeanu and Niawen Xue and Yi
Zhang},
title = {The {CoNLL-2009} Shared Task: Syntactic and Semantic Dependencies
in Multiple Languages},
booktitle = {Proceedings of CoNLL-2009},
year = {2009},
address = {Boulder, CO}
}
@InProceedings{han24:_towar_under_relat_in_learn_compos_gener,
author = {Sungjun Han and Sebastian Padó},
title = {Towards Understanding the Relationship between In-context Learning and Compositional Generalization},
booktitle = {Proceedings of {LREC-COLING}},
year = 2024,
address = {Torino, Italy},
url = {https://aclanthology.org/2024.lrec-main.1449/}}
@article{haunss20:_integ_manual_autom_annot_creat,
abstract = {This article investigates the integration of machine learning in the political claim annotation workflow with the goal to partially automate the annotation and analysis of large text corpora. It introduces the MARDY annotation environment and presents results from an experiment in which the annotation quality of annotators with and without machine learning based annotation support is compared. The design and setting aim to measure and evaluate: a) annotation speed; b) annotation quality; and c) applicability to the use case of discourse network generation. While the results indicate only slight increases in terms of annotation speed, the authors find a moderate boost in annotation quality. Additionally, with the help of manual annotation of the actors and filtering out of the false positives, the machine learning based annotation suggestions allow the authors to fully recover the core network of the discourse as extracted from the articles annotated during the experiment. This is due to the redundancy which is naturally present in the annotated texts. Thus, assuming a research focus not on the complete network but the network core, an AI-based annotation can provide reliable information about discourse networks with much less human intervention than compared to the traditional manual approach.},
added-at = {2020-03-23T20:19:19.000+0100},
author = {Haunss, Sebastian and Kuhn, Jonas and Padó, Sebastian and Blessing, Andre and Blokker, Nico and Dayanik, Erenay and Lapesa, Gabriella},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/24dcbda0eb92af231eef2b033d8d51b23/sp},
interhash = {31dfa8365630383946e268e14bca9968},
intrahash = {4dcbda0eb92af231eef2b033d8d51b23},
journal = {Politics and Governance},
keywords = {article myown},
number = 2,
timestamp = {2020-06-02T16:24:19.000+0200},
title = {Integrating Manual and Automatic Annotation for the Creation of Discourse Network Data Sets},
url = {https://dx.doi.org/10.17645/pag.v8i2.2591},
volume = 8,
year = 2020
}
@InProceedings{heid/etal:04a,
author = {Ulrich Heid and Holger Voormann and Jan-Torsten Milde and Ulrike
Gut and Katrin Erk and Sebastian Pad{ó}},
title = {Querying both time-aligned and hierarchical corpora with NXT Search},
booktitle = {Proceedings of LREC-2004},
year = {2004},
pages = {1455 -- 1459},
address = {Lisboa},
organization = {LREC},
keywords = {corpus query, XML corpora, cross-level observations, corpora tools,
querying},
pdf = {https://www.ims.uni-stuttgart.de/~holger/publications/lrec04nxtsearch.pdf}
}
@InProceedings{iris09:_semev_task,
author = {Iris Hendrickx and Su Nam Kim and Zornitsa Kozareva and Preslav Nakov
and Diarmuid {Ó} S{é}aghdha and Sebastian Pad{ó} and Marco
Pennacchiotti and Lorenza Romano and Stan Szpakowicz},
title = {SemEval-2010 Task 8: Multi-Way Classification of Semantic Relations
Between Pairs of Nominals},
booktitle = {Proceedings of the NAACL Workshop on Semantic Evaluations: Recent
Achievements and Future Directions},
year = {2009},
address = {Boulder, CO}
}
@InProceedings{hendrickx10:_multi_way_class_of_seman,
author = {Iris Hendrickx and Su Nam Kim and Zornitsa Kozareva and Preslav Nakov
and Diarmuid {Ó} S{é}aghdha and Sebastian Pad{ó} and Marco
Pennacchiotti and Lorenza Romano and Stan Szpakowicz},
title = {Multi-Way Classification of Semantic Relations Between Pairs of Nominals},
booktitle = {Proceedings of the 5th SIGLEX Workshop on Semantic Evaluation},
year = {2010},
address = {Uppsala, Sweden}
}
@InProceedings{hole19:_distr_analy_funct_words,
author = {Daniel Hole and Sebastian Padó},
title = {Distributional Analysis of Function Words},
booktitle = {Proceedings of the 13th International Tbilisi
Symposium on Language, Logic and Computation},
year = 2019,
keywords = {myown abstract},
url = {https://www.nlpado.de/~sebastian/pub/papers/tbillc19_hole.pdf},
address = {Batumi, Georgia}}
@InProceedings{kim17:_inves_relat_liter_genres_emotion_plot_devel,
author = {Evgeny Kim and Sebastian Padó and Roman Klinger},
title = {Investigating the Relationship between Literary Genres and Emotional Plot Development},
booktitle = {Proceedings of the ACL LaTeCH-CLfL workshop},
year = 2017,
address = {Vancouver, BC},
keywords = {workshop myown},
url = {https://www.aclweb.org/anthology/W17-2203.pdf},
abstract = {Literary genres are commonly viewed as being defined in terms of
content and stylistic features. In this paper, we focus on one
particular class of lexical features, namely emotion
information, and investigate the hypothesis that emotion-related
information correlates with particular genres. Using genre
classification as a testbed, we compare a model that computes
lexicon-based emotion scores globally for complete stories
with a model that tracks emotion arcs through stories on a
subset of Project Gutenberg with five genres.
Our main findings are: (a), the global emotion model is competitive
with a large-vocabulary bag-of-words genre classifier (80 (b), the emotion arc model shows a lower performance (59 shows complementary behavior to the global model, as indicated by
very good performance of an oracle ensemble (94 differ in the extent to which stories follow the same emotional
arcs, with particularly uniform behavior for anger (mystery) and
fear (adventures, romance, humor, science fiction).}}
@InProceedings{Kim2017,
author = {Evgeny Kim and Sebastian Padó and Roman Klinger},
title = {{Prototypical Emotion Developments in Literary Genres}},
booktitle = {Digital Humanities 2017: Conference Abstracts},
year = {2017},
optpages = {},
address = {Montréal, Canada},
month = {August},
organization = {McGill University and Université de Montréal},
url = {https://www.romanklinger.de/publications/kim2017.pdf},
pdf = {https://dh2017.adho.org/abstracts/203/203.pdf}
}
@proceedings{kim11:_proceed_of_acl_works_relat,
title = {Proceedings of the ACL Workshop on Relational Models of Semantics},
year = {2011},
editor = {S. Kim and Z. Kozareva and P. Nakov and D. {Ó} S{é}aghdha and
S. Pad{ó} and S. Szpakowicz},
address = {Portland, OR}
}
@InProceedings{kisselew15:_obtain_better_under_distr_model,
author = {Max Kisselew and Sebastian Pad{ó} and Alexis Palmer and Jan v {S}najder},
title = {Obtaining a Better Understanding of Distributional Models of German
Derivational Morphology},
booktitle = {Proceedings of IWCS},
year = {2015},
pages = {58-63},
address = {London, UK},
url = {https://www.aclweb.org/anthology/W15-0108}
}
@InProceedings{kisselew16:_predic_direc_deriv_englis_conver,
abstract = {Conversion is a word formation operation that changes the grammatical category of a word in the absence of overt morphology. Conversion is extremely productive in English (e.g., tunnel, talk). This paper investigates whether distributional information can be used to predict the diachronic direction of conversion for homophonous nounâverb pairs. We aim to predict, for example, that tunnel was used as a noun prior to its use as a verb. We test two hypotheses: (1) that derived forms are less frequent than their bases, and (2) that derived forms are more semantically specific than their bases, as approximated by information theoretic measures. We find that hypothesis (1) holds for N-to-V conversion, while hypothesis (2) holds for V-to-N conversion. We achieve the best overall account of the historical data by taking both frequency and semantic specificity into account. These results provide a new perspective on linguistic theories regarding the semantic specificity of derivational morphemes, and on the morphosyntactic status of conversion.},
address = {Berlin, Germany},
author = {Kisselew, Max and Rimell, Laura and Palmer, Alexis and Pad{o}, Sebastian},
booktitle = {Proceedings of the ACL SIGMORPHON workshop},
interhash = {216c950a88f808a231b17caf14f8eff9},
intrahash = {edd5ccf6dfe9fc4f9b2d64fd1c6dc201},
pages = {93-98},
title = {Predicting the Direction of Derivation in {E}nglish conversion},
url = {https://www.aclweb.org/anthology/W/W16/W16-2015.pdf},
year = 2016
}
@incollection{Klinger2020,
author = "Roman Klinger and Evgeny Kim and Sebastian Padó",
title = "Emotion Analysis for Literary Studies",
booktitle = "Reflektierte algorithmische Textanalyse",
year = "2020",
publisher = "De Gruyter",
address = "Berlin, Boston",
doi = "https://doi.org/10.1515/9783110693973-011",
pages= "237 - 268",
url = "https://www.degruyter.com/view/book/9783110693973/10.1515/9783110693973-011.xml"
}
@InProceedings{KoeperEtAl:16b,
author = {Maximilian Köper and Sabine {Schulte im Walde} and Max Kisselew and Sebastian Padó},
title = {{Improving Zero-Shot-Learning for German Particle Verbs by using Training-Space Restrictions and Local Scaling}},
booktitle = {Proceedings of the 5th Joint Conference on Lexical and Computational Semantics (*SEM)},
year = {2016},
pages = {91-96},
address = {Berlin, Germany}
}
@InProceedings{kremer14:_what_subst_tell_us,
author = {Gerhard Kremer and Katrin Erk and Sebastian Pad{ó} and Stefan Thater},
title = {What Substitutes Tell Us -- {A}nalysis of an "All-Words" Lexical Substitution
Corpus},
booktitle = {Proceedings of EACL},
year = {2014},
address = {Gothenburg, Sweden},
pdf = {https://www.aclweb.org/anthology/E14-1057}
}
@article{kremer11:_phras_table_suppor_for_human_trans,
author = {Gerhard Kremer and Matthias Hartung and Sebastian Pad{ó} and Stefan
Riezler},
title = {Statistical Machine Translation Support Improves Human Adjective
Translation},
journal = {Translation: Computation, Corpora, Cognition},
year = 2012,
volume = 2,
pages = {103-126},
number = 1,
pdf = {http://www.blogs.uni-mainz.de/fb06-tc3/files/2015/11/15-92-3-PB.pdf}
}
@InProceedings{kuhn16:_creta_centr_textan_fach_method_digit_human,
author = {Jonas Kuhn and Artemis Alexiadou and Manuel Braun and Thomas Ertl and Sabine Holtz and Cathleen Kantner and Catrin Misselhorn and Sebastian Padó and Sandra Richter and Achim Stein and Claus Zittel},
title = {{CRETA (Centrum f{u}r reflektierte Textanalyse) -- Fach{u}bergreifende Methodenentwicklung in den Digital Humanities}},
booktitle = {Digital Humanities im Deutschsprachigen Raum},
year = 2016,
keywords = {myown abstract},
address = {Leipzig, Germany}}
@article{lapesa20:_analy_polit_debat_newsp_repor,
abstract = {Discourse network analysis is an aspiring development in political science which analyzes political debates in terms of bipartite actor/claim networks. It aims at understanding the structure and temporal dynamics of major political debates as instances of politicized democratic decision making. We discuss how such networks can be constructed on the basis of large collections of unstructured text, namely newspaper reports. We sketch a hybrid methodology of manual analysis by domain experts complemented by machine learning and exemplify it on the case study of the German public debate on immigration in the year 2015. The first half of our article sketches the conceptual building blocks of discourse network analysis and demonstrates its application. The second half discusses the potential of the application of NLP methods to support the creation of discourse network datasets.},
added-at = {2020-05-29T15:45:59.000+0200},
author = {Lapesa, Gabriella and Blessing, Andre and Blokker, Nico and Dayanik, Erenay and Haunss, Sebastian and Kuhn, Jonas and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/24226ed780f206d3d17058c3482f81bf1/sp},
interhash = {cfd5940a96a17ad172311fe643cff81b},
intrahash = {4226ed780f206d3d17058c3482f81bf1},
journal = {Datenbank-Spektrum},
keywords = {article myown},
number = 2,
timestamp = {2020-06-18T16:40:56.000+0200},
title = {Analysis of Political Debates through Newspaper Reports: Methods and Outcomes},
url = {https://dx.doi.org/10.1007/s13222-020-00344-w},
volume = 20,
year = 2020
}
@InProceedings{lapesa2020debatenetmig15,
abstract = {DEbateNet-migr15 is a manually annotated dataset for German which covers the public debate on immigration in 2015. The building block of our annotation is the political science notion of a claim, i.e., a statement made by a political actor (a politician, a party, or a group of citizens) that a specific action should be taken (e.g., vacant flats should be assigned to refugees). We identify claims in newspaper articles, assign them to actors and fine-grained categories and annotate their polarity and date. The aim of this paper is two-fold: first, we release the full DEbateNet-mig15 corpus and document it by means of a quantitative and qualitative analysis; second, we demonstrate its application in a discourse network analysis framework, which enables us to capture the temporal dynamics of the political debate.},
added-at = {2020-02-11T14:44:55.000+0100},
address = {Online},
author = {Lapesa, Gabriella and Blessing, Andre and Blokker, Nico and Dayanik, Erenay and Haunss, Sebastian and Kuhn, Jonas and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/23e4f84069e33ea38700b4b9e36f6e61e/sp},
booktitle = {Proceedings of LREC},
interhash = {351c134387fd9e594c83bc773b14529e},
intrahash = {3e4f84069e33ea38700b4b9e36f6e61e},
keywords = {conference myown},
pages = {919-927},
timestamp = {2020-12-07T16:42:49.000+0100},
title = {{DEbateNet-mig15}: {T}racing the 2015 Immigration Debate in {G}ermany Over Time},
url = {https://www.aclweb.org/anthology/2020.lrec-1.115},
year = 2020
}
@InProceedings{lapesa17:_type_englis,
author = {Gabriella Lapesa and Lea Kawaletz and Marios Andreou and Max Kisselew and Sebastian Padó and Ingo Plag},
title = {Type disambiguation of {E}nglish textit {-ment} derivatives},
booktitle = {Proceedings of the 11th Mediterranean Morphology Meeting},
year = 2017,
keywords = {myown abstract},
address = {Nikosia, Cyprus}}
@Article{lapesa17:_disam,
author = {Gabriella Lapesa and Lea Kawaletz and Ingo Plag and Marios Andreou and Max Kisselew and Sebastian Padó},
title = {Disambiguation of newly derived nominalizations in context: {A Distributional Semantics approach}},
journal = {Word Structure},
keywords = {myown},
url = {https://doi.org/10.3366/word.2018.0131},
volume = {11},
number = {3},
pages = {315-350},
year = 2018}
@InProceedings{lapesa16:_charac,
author = {Gabriella Lapesa and Max Kisselew and Sebastian Padó and Tillmann Pross and Antje Rossdeutscher},
title = {Characterizing the pragmatic component of distributional vectors in terms of polarity: Experiments on {G}erman textit {{u}ber} verbs},
booktitle = {ESSLLI DISSALT Workshop: Distributional Semantics and Semantic Theory},
year = 2016,
keywords = {myown abstract},
address = {Bolzano, Italy}}
@InProceedings{lapesa17:_are_doggies_reall_nicer_than_dogs,
author = {Gabriella Lapesa and Sebastian Padó and Tillmann Pross and Antje Rossdeutscher},
title = {Are doggies really nicer than dogs? The impact of morphological derivation on emotional valence in {G}erman},
keywords = {conference myown},
booktitle = {Proceedings of IWCS},
year = 2017,
url = {https://www.aclweb.org/anthology/W/W17/W17-6922.pdf},
address = {Montpellier, France}}
@article{lehmann23:_clasif_traged_comed_comed_nuevas_calder_barca,
abstract = {In this study, we aim at distinguishing comedies and tragedies among 112 dramas written by Calderón de la Barca, using procedures established by distri-butional semantics. Fifteen of these comedias nuevas have already been classified by qualitative re-searchers as either tragedies or comedies, respec-tively; for another 82 dramas the classification was unknown. Four independent document embedding methods are explored, which differ from each other in matrix creation and reduction, and in the calcula-tion of similarity or distance matrices. The best results âmeasured against the pre-established classification of these dramasâare obtained through the classifi-cation procedure that applied the strongest matrix reduction. In addition, a contrastive vocabulary anal-ysis with word embeddings is carried out, based either on word lists produced by the four tested methods, or on the log-likelihood probability distri-bution for two sub-corpora containing only dramas already determined to be comedies or tragedies. This step permits the identification of 130 terms that are each discriminative either of comedies or of tragedies. The outcome shows that the explored methods identify tragedies with greater accuracy than comedies, indicating that tragedies have more distinctive features. It also becomes apparent that one could more appropriately consider classifications such as tragedy and comedy as poles between which gradual differences can be observed, where-by the ensuing transitional area contains comedias nuevas that have been described in prior research as tragicomedias or comedias mitológicas.},
added-at = {2022-10-19T14:39:11.000+0200},
author = {Lehmann, J{ö}rg and Pad{ó}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2388066e31e534b24a99e95d21906dcce/sp},
doi = {10.5944/rhd.vol.7.2022.34588},
interhash = {7350ac10fde56fc7863169e09050a11f},
intrahash = {388066e31e534b24a99e95d21906dcce},
journal = {Revista de Humanidades Digitales},
keywords = {myown},
note = {Spanish version of Lehmann and Padó (ZfDG 2022) modulo reviewer comments},
pages = {80-103},
timestamp = {2023-08-15T15:18:13.000+0200},
title = {Clasificaci{ó}n de Tragedias y Comedias en las Comedias Nuevas de Calder{ó}n de la Barca},
url = {https://revistas.uned.es/index.php/RHD/article/view/34588/27466},
volume = 7,
year = 2022
}
@article{lehmann22:_class_calder_barca,
abstract = {In this study, we aim at distinguishing comedies and tragedies among 112 dramas written by Calderón de la Barca, using procedures established by distributional semantics. 15 each of these comedias nuevas have already been classified by qualitative researchers as either tragedies or comedies, respectively; for another 82 dramas the classification was unknown. Four independent document embedding methods are explored, which differ from each other in matrix creation and reduction, and in the calculation of similarity or distance matrices. The best results â measured against the pre-established classification of these dramas â are obtained through the classification procedure that applied the strongest matrix reduction. In addition, a contrastive vocabulary analysis with word embeddings is carried out, based either on word lists produced by the four tested methods, or on the log-likelihood probability distribution for two sub-corpora containing only dramas already determined to be comedies or tragedies. This step permits the identification of 130 terms that are each discriminative either of comedies or of tragedies. The outcome shows that the explored methods identify tragedies with greater accuracy than comedies, indicating that tragedies show stronger lexical cohesion. It also becomes apparent that one could more appropriately consider classifications such as âºtragedyâ¹ and âºcomedyâ¹ as poles between which gradual differences can be observed, whereby the ensuing transitional area contains comedias nuevas that have been described in prior research as tragicomedias or comedias mitológicas.},
added-at = {2022-10-19T14:38:38.000+0200},
author = {Lehmann, J{ö}rg and Pad{ó}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/230947a4ac33c2a3243a59e2d07088f60/sp},
interhash = {1e43053d942812068aa84d26f12a11f3},
intrahash = {30947a4ac33c2a3243a59e2d07088f60},
journal = {Zeitschrift für Digitale Geisteswissenschaft},
keywords = {article myown},
note = {English version of Lehmann and Padó (RHD 2022) modulo reviewer comments},
timestamp = {2023-05-25T20:10:42.000+0200},
title = {Classification of comedies and tragedies written in {Calderón de la Barca's Comedias Nuevas}},
url = {https://doi.org/10.17175/2022_012},
volume = 7,
year = 2022
}
@InProceedings{lemmerth18:_percep_englis,
author = {Natalia Lemmerth and Sebastian Padó},
title = {Perception of formality levels by native speakers of English},
booktitle = {8th International Symposium on Intercultural, Cognitive and Social Pragmatics},
year = 2018,
address = {Sevilla, Spain}}
@Article{lenci22:_editor,
author = {Alessandro Lenci and Sebastian Padó},
title = {Editorial: Perspectives for Natural Language Processing between AI, Linguistics and Cognitive Science},
keywords = {article myown},
journal = {Frontiers in Artificial Intelligence},
year = 2022,
note = {Editorial for the Frontiers Research Topic}}
@InProceedings{maab24:_media_bias_detec_acros_famil_languag_model,
author = {Iffat Maab and Edison Marrese-Taylor and Sebastian Padó and Yutaka Matsuo},
title = {Media Bias Detection Across Families of Language Models},
booktitle = {Proceedings of NAACL},
year = 2024,
url = {https://aclanthology.org/2024.naacl-long.227/},
address = {Mexico City, Mexico}}
@incollection{tsd2014,
author = {Magnini, Bernardo and Dagan, Ido and Neumann, G{ü}nter and Pad{ó},
Sebastian},
title = {Entailment Graphs for Text Analytics in the Excitement Project},
booktitle = {Proceedings of Text, Speech and Dialogue},
year = {2014},
pages = {11-18},
address = {Brno, Czech Republic},
doi = {10.1007/978-3-319-10816-2_2},
language = {English},
url = {https://dx.doi.org/10.1007/978-3-319-10816-2_2}
}
@InProceedings{maurer24:_toein,
author = {Maximilian Maurer and Tanise Ceron and Sebastian Padó and Gabriella Lapesa},
title = {Toeing the party line: {E}lection manifestos as a key to understand political discourse on {T}witter},
booktitle = {Findings of EMNLP},
year = 2024,
address = {Miami, FL},
note = {Accepted for publication}}
@InProceedings{medic17:_does_free_word_order_hurt,
author = {Zoran MediÄ and Jan Å najder and Sebastian Padó},
title = {Does Free Word Order Hurt? {A}ssessing the Practical Lexical Function Model for {C}roatian},
booktitle = {Proceedings of STARSEM},
keywords = {conference myown},
year = 2017,
address = {Vancouver, BC},
note = {Acceptance rate: 36 url = {https://www.aclweb.org/anthology/S17-1014.pdf},
pages = {115-120}
}
@InProceedings{melymuka17:_model_deriv_morph_in_ukrain,
author = {Mariia Melymuka and Gabriella Lapesa and Max Kisselew and Sebastian Padó},
title = {Modeling Derivational Morphology in {U}krainian},
keywords = {conference myown},
booktitle = {Proceedings of IWCS},
year = 2017,
url = {https://www.aclweb.org/anthology/W/W17/W17-6928.pdf},
address = {Montpellier, France}}
@InProceedings{mirkin10:_asses_role_of_discour_refer,
author = {Shachar Mirkin and Ido Dagan and Sebastian Pad{ó}},
title = {Assessing the Role of Discourse References in Entailment Inference},
booktitle = {Proceedings of ACL 2010},
year = {2010},
address = {Uppsala, Sweden}
}
@inproceedings{moeller23:_attrib_method_siames_encod,
abstract = {Despite the success of Siamese encoder models such as sentence
transformers (ST), little is known about the aspects of inputs they
pay attention to. A barrier is that their predictions cannot be
attributed to individual features, as they compare two inputs rather
than processing a single one.
This paper derives a local attribution method for Siamese encoders by generalizing
the principle of integrated gradients to models with multiple inputs.
The solution takes the form of feature-pair attributions, and can be reduced to a token-token matrix for STs.
Our method involves the introduction of integrated Jacobians and inherits the advantageous formal properties of integrated gradients: it accounts for the model's full computation graph and is guaranteed to converge to the actual prediction.
A pilot study shows that in an ST few token-pairs can often explain large fractions of predictions, and it focuses on nouns and verbs.
For accurate predictions, it however needs to attend to the majority of tokens and parts of speech.
},
added-at = {2023-10-07T22:28:13.000+0200},
address = {Singapore},
author = {M{ö}ller, Lucas and Nikolaev, Dmitry and Pad{ó}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/287b7c46571480164f9d4d94074a7467a/sp},
booktitle = {Proceedings of EMNLP},
interhash = {3d5cea892bbb78bdf925f5eae5aeb211},
intrahash = {87b7c46571480164f9d4d94074a7467a},
keywords = {conference myown},
note = {To appear},
timestamp = {2023-10-19T15:57:48.000+0200},
title = {An Attribution Method for Siamese Encoders},
url = {https://arxiv.org/pdf/2310.05703.pdf},
year = 2023
}
@InProceedings{moeller22:understanding,
author = {Lucas M{ö}ller and Sebastian Padó},
title = {Understanding the Relation of User and News Representations in Content-Based Neural News Recommendation},
booktitle = {Proceedings of the SIGIR Workshop on News Recommendation and Analytics},
year = 2022,
keywords = {myown workshop},
}
@article{moeller24:explaining-neural,
author = {Lucas Möller and Sebastian Padó},
title = {Explaining Neural News Recommendation with Attributions onto Reading Histories},
journal = {ACM Transactions on Intelligent Systems and Technology},
url = {https://doi.org/10.1145/3673233},
year = 2024}
@article{mujcricza-majdt12:_high_precis_senten_align_boots,
author = {Éva M{ú}jcricza-Majdt and Huiqin K{ö}rkel-Qu and Stefan Riezler
and Sebastian Pad{ó}},
title = {High-Precision Sentence Alignment by Bootstrapping from Wood Standard
Annotations},
journal = {Prague Bulletin of Mathematical Linguistics},
year = {2013},
volume = {99},
pages = {5-16}
}
@InProceedings{nikitina13,
author = {Olga Nikitina and Sebastian Pad{ó}},
title = {A corpus study of clause combination},
booktitle = {Proceedings of IWCS},
year = {2013},
address = {Potsdam, Germany}
}
@inproceedings{nikolaev23:_adver,
abstract = {This paper begins with the premise
that adverbs are
neglected in computational linguistics.
This view derives from two analyses: a literature review
and a novel adverb dataset to probe a state-of-the-art language model, thereby uncovering systematic gaps
in accounts for adverb meaning.
We suggest that using Frame Semantics
for characterizing word meaning, as
in FrameNet, provides a promising approach to adverb analysis, given its ability to describe ambiguity, semantic roles, and null instantiation.
},
added-at = {2023-05-16T14:56:38.000+0200},
address = {Toronto, Canada},
author = {Nikolaev, Dmitry and Baker, Collin and Petruck, Miriam R. L. and Pad{ó}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/240281f8254d2c4178f0777f53ce9158b/sp},
booktitle = {Proceedings of STARSEM},
interhash = {3639063158f8d1a513da26f8fadfa0d3},
intrahash = {40281f8254d2c4178f0777f53ce9158b},
keywords = {conference myown},
timestamp = {2023-07-23T16:04:35.000+0200},
title = {Adverbs, surprisingly},
url = {https://aclanthology.org/2023.starsem-1.44.pdf},
year = 2023
}
@inproceedings{nikolaev23:_multil,
abstract = {Scaling analysis is a technique in computational political science that assigns a political actor (e.g. politician or party) a score on a predefined scale based on a (typically long) body of text (e.g. a parliamentary speech or an election manifesto). For example, political scientists have often used the left-right scale to systematically analyse political landscapes of different countries. NLP methods for automatic scaling analysis can find broad application provided they (i) are able to deal with long texts and (ii) work robustly across domains and languages. In this work, we implement and compare two approaches to automatic scaling analysis of political-party manifestos: label aggregation, a pipeline strategy relying on annotations of individual statements from the manifestos, and long-input-Transformer-based models, which compute scaling values directly from raw text. We carry out the analysis of the Comparative Manifestos Project dataset across 41 countries and 27 languages and find that the task can be efficiently solved by state-of-the-art models, with label aggregation producing the best results.},
added-at = {2023-10-07T22:27:17.000+0200},
address = {Singapore},
author = {Nikolaev, Dmitry and Ceron, Tanise and Pad{ó}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/26b0a2b73c4af8c94ae6add12514d94cc/sp},
booktitle = {Proceedings of EMNLP},
interhash = {a4cc641921c9aa1101655d4b42d3a9d2},
intrahash = {6b0a2b73c4af8c94ae6add12514d94cc},
keywords = {conference myown},
note = {To appear},
timestamp = {2023-10-20T10:36:18.000+0200},
title = {Multilingual estimation of political-party positioning: From label aggregation to long-input Transformers},
url = {https://arxiv.org/abs/2310.12575},
year = 2023
}
@inproceedings{nikolaev-pado-2022-word,
title = "Word-order Typology in Multilingual {BERT}: A Case Study in Subordinate-Clause Detection",
author = "Nikolaev, Dmitry and Pad{ó}, Sebastian",
booktitle = "Proceedings of the 4th Workshop on Research in Computational Linguistic Typology and Multilingual NLP",
month = jul,
year = "2022",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.sigtyp-1.2",
doi = "10.18653/v1/2022.sigtyp-1.2",
pages = "11-21",
abstract = "The capabilities and limitations of BERT and similar models are still unclear when it comes to learning syntactic abstractions, in particular across languages. In this paper, we use the task of subordinate-clause detection within and across languages to probe these properties. We show that this task is deceptively simple, with easy gains offset by a long tail of harder cases, and that BERT{'}s zero-shot performance is dominated by word-order effects, mirroring the SVO/VSO/SOV typology.",
}
@inproceedings{nikolaev23:_argadj,
abstract = {The distinction between arguments and adjuncts is a fundamental
assumption of several linguistic theories. In this study, we investigate to
what extent this distinction is picked up by a Transformer-based
language model. We use BERT as a case study, operationalizing
arguments and adjuncts as core and non-core FrameNet frame elements,
respectively, and tying them to activations of
particular BERT neurons.
We present evidence, from English and Korean, that BERT learns more
dedicated representations for arguments than for adjuncts when
fine-tuned on the FrameNet frame-identification task. We also show that
this distinction is already present in a weaker form in the vanilla
pre-trained model.},
added-at = {2023-04-26T18:18:31.000+0200},
address = {Nancy, France},
author = {Nikolaev, Dmitry and Pad{ó}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2a999feae16c6e240857b18253302a552/sp},
booktitle = {Proceedings of IWCS},
interhash = {e17b9da7bc7833b7ca6de99aab4a5e32},
intrahash = {a999feae16c6e240857b18253302a552},
keywords = {conference myown},
timestamp = {2023-07-04T19:35:55.000+0200},
title = {The argument-adjunct distinction in {BERT}: A {FrameNet}-based investigation},
url = {https://iwcs.pimoid.fr/2.pdf},
year = 2023
}
@inproceedings{nikolaev23:_inves_trans,
abstract = { The question of what kinds of linguistic information are encoded in different layers
of Transformer-based language models is of considerable interest for the NLP community.
Existing work, however, has overwhelmingly focused on word-level representations and
encoder-only language models with the masked-token training objective.
In this paper, we present experiments with semantic structural probing,
a method for studying sentence-level representations
via finding a subspace of the embedding space that provides
suitable task-specific pairwise distances between data-points.
We apply our method to language models from different families (encoder-only, decoder-only,
encoder-decoder) and of different sizes in the context of two tasks, semantic textual similarity
and natural-language inference. We find that model families differ substantially in their
performance and layer dynamics, but that the results are largely model-size invariant.},
added-at = {2023-10-08T21:21:47.000+0200},
address = {Singapore},
author = {Nikolaev, Dmitry and Pad{ó}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2412d5acd02f1f1261544a2208b8b24eb/sp},
booktitle = {Proceedings of the BlackboxNLP workshop},
interhash = {fbe02e9b3012e988b0767116af72521d},
intrahash = {412d5acd02f1f1261544a2208b8b24eb},
keywords = {myown workshop},
note = {To appear},
timestamp = {2023-10-19T15:59:55.000+0200},
title = {Investigating semantic subspaces of Transformer sentence embeddings through linear structural probing},
url = {https://arxiv.org/abs/2310.11923},
year = 2023
}
@inproceedings{nikolaev23:_repres,
abstract = {Variants of the BERT architecture specialised for producing full-sentence representations often achieve better performance on downstream tasks than sentence embeddings extracted from vanilla BERT. However, there is still little understanding of what properties of inputs determine the properties of such representations. In this study, we construct several sets of sentences with pre-defined lexical and syntactic structures and show that SOTA sentence transformers have a strong nominal-participant-set bias: cosine similarities between pairs of sentences are more strongly determined by the overlap in the set of their noun participants than by having the same predicates, lengthy nominal modifiers, or adjuncts. At the same time, the precise syntactic-thematic functions of the participants are largely irrelevant.},
added-at = {2023-01-23T10:54:40.000+0100},
address = {Dubrovnik, Croatia},
author = {Nikolaev, Dmitry and Pad{ó}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2c0170446a363bca46ae6b2da016b151d/sp},
booktitle = {Proceedings of EACL},
interhash = {2dd0ba5490c2846139e7654557e7eb14},
intrahash = {c0170446a363bca46ae6b2da016b151d},
keywords = {conference myown},
timestamp = {2023-05-02T15:21:40.000+0200},
title = {Representation biases in sentence transformers},
url = {https://aclanthology.org/2023.eacl-main.268},
year = 2023
}
@inproceedings{nikolaev23:_universe,
abstract = {It has been argued that BERT ``rediscovers the traditional NLP
pipeline'', with lower layers extracting morphosyntactic features and
higher layers creating holistic sentence-level representations.
In this paper, we critically examine this assumption through a
principle-component-guided analysis, extracing sets of inputs that
correspond to specific activation patterns in BERT sentence representations.
We find that even in higher layers, the model mostly picks up on a
variegated bunch of low-level features, many related to sentence
complexity, that presumably arise from its specific pre-training
objectives.},
added-at = {2023-04-26T18:18:19.000+0200},
address = {Nancy, France},
author = {Nikolaev, Dmitry and Pad{ó}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2995882df8792875cb3a67bbea378a90a/sp},
booktitle = {Proceedings of IWCS},
interhash = {9dabc394be5ffe8d8b9818192791f99a},
intrahash = {995882df8792875cb3a67bbea378a90a},
keywords = {conference myown},
timestamp = {2023-07-04T19:36:51.000+0200},
title = {The Universe of Utterances According to {BERT}},
url = {https://iwcs.pimoid.fr/60.pdf},
year = 2023
}
@InProceedings{dblp:conf/gldv/nohp13,
author = {Tae-Gil Noh and Sebastian Pad{ó}},
title = {Using UIMA to Structure An Open Platform for Textual Entailment},
booktitle = {Proceedings of the 3rd Workshop on Unstructured Information Management
Architecture},
year = {2013},
pages = {26-33},
address = {Darmstadt, Germany},
bibsource = {DBLP, http://dblp.uni-trier.de},
ee = {http://ceur-ws.org/Vol-1038/paper_3.pdf}
}
@InProceedings{noh14:_languag_model_sensit_discour_contex,
author = {Tae-Gil Noh and Sebastian Pad{ó}},
title = {A Language Model Sensitive to Discourse Context},
booktitle = {Proceedings of KONVENS},
year = {2014},
pages = {201-206},
address = {Hildesheim, Germany},
url = {http://opus.bsz-bw.de/ubhi/volltexte/2014/282/}
}
@InProceedings{noh15:_multi_level_align_as_exten,
author = {Tae-Gil Noh and Sebastian Pad{ó} and Vered Shwartz and Ido Dagan
and Vivi Nastase and Kathrin Eichler and Lili Kotlerman},
title = {Multi-Level Alignments As An Extensible Representation Basis for
Textual Entailment Algorithms},
booktitle = {Proceedings of STARSEM},
year = {2015},
pages = {193-198},
address = {Denver, CO},
url = {https://www.aclweb.org/anthology/S15-1022}
}
@proceedings{pado11:_proceed_of_emnlp_works_geomet,
title = {Proceedings of the EMNLP Workshop on Geometrical Models of Natural
Language Semantics},
year = {2011},
editor = {S. Pad{ó} and Y. Peirsman},
address = {Edinburgh, UK}
}
@proceedings{pado11:_proceed_of_emnlp_textin_works_textual_entail,
title = {Proceedings of the EMNLP TextInfer Workshop on Textual Entailment},
year = {2011},
editor = {S. Pad{ó} and S. Thater},
address = {Edinburgh, UK}
}
@book{sebastianpado07:_cross_lingual_annot_projec_model,
title = {Cross-Lingual Annotation Projection Models for Role-Semantic Information},
publisher = {German Research Center for Artificial Intelligence and Saarland University},
year = {2007},
author = {Sebastian Pad{ó}},
volume = {21},
series = {Saarbr{ü}cken Dissertations in Computational Linguistics and Language
Technology}
}
@InProceedings{pado07:_translational,
author = {Sebastian Pad{ó}},
title = {Translational Equivalence and Cross-lingual Parallelism: The Case
of FrameNet Frames},
booktitle = {Proceedings of the NODALIDA Workshop on Building Frame Semantics
Resources for Scandinavian and Baltic Languages},
year = {2007},
address = {Tartu, Estonia}
}
@InProceedings{pado19:_who_sides_with_whom,
author = {Sebastian Padó and Andre Blessing and Nico Blokker and Erenay Dayanik and Sebastian Haunss and Jonas Kuhn},
title = {Who Sides With Whom? Towards Computational Construction of Discourse Networks for Political Debates},
booktitle = {Proceedings of ACL},
keywords = {conference myown},
year = 2019,
address = {Florence, Italy}}
@article{pado:_seman_mt_based_entail_featur,
author = {Sebastian Pad{ó} and Daniel Cer and Michel Galley and Christopher
D. Manning and Daniel Jurafsky},
title = {Measuring Machine Translation Quality as Semantic Equivalence: A
Metric based on Entailment Features},
journal = {Machine Translation},
year = {2009},
volume = {23},
pages = {181-193},
number = {2-3}
}
@incollection{pado10:_textual_entail,
author = {Sebastian Pad{ó} and Ido Dagan},
title = {Textual Entailment},
booktitle = {Oxford Handbook of Natural Language Processing},
publisher = {Oxford University Press},
year = {2017},
editor = {Ruslav Mitkov}
}
@InProceedings{pado05:_cause,
author = {Sebastian Pad{ó} and Katrin Erk},
title = {To cause or not to cause: Cross-lingual semantic matching for paraphrase
modelling},
booktitle = {Proceedings of the Cross-Language Knowledge Induction Workshop},
year = {2005},
address = {Cluj-Napoca, Romania}
}
@InProceedings{pado08:entailment_for_mt,
author = {Sebastian Pad{ó} and Michel Galley and Dan Jurafsky and Christopher
Manning},
title = {Using entailment technology for MT evaluation},
booktitle = {Proceedings of the NIST MetricsMATR workshop},
year = {2008},
address = {Honolulu, HI}
}
@InProceedings{pado09:_robus_machin_trans_evaluat_with_entail_featur,
author = {Sebastian Pad{ó} and Michel Galley and Christopher D. Manning and
Daniel Jurafsky},
title = {Robust Machine Translation Evaluation with Entailment Features},
booktitle = {Proceedings of ACL 2009},
year = {2009},
address = {Singapore}
}
@InProceedings{pado09:_textual_entail_featur_for_machin_trans_evaluat,
author = {Sebastian Pad{ó} and Michel Galley and Christopher D. Manning and
Daniel Jurafsky},
title = {Textual Entailment Features for Machine Translation Evaluation},
booktitle = {Proceedings of the EACL Workshop on Machine Translation},
year = {2009},
address = {Athens, Greece}
}
@InProceedings{pado16:_predic_distr_seman_deriv_word_format,
abstract = {Compositional distributional semantic models (CDSMs) have successfully been applied to the task of predicting the meaning of a range of linguistic constructions. Their performance on semi-compositional word formation process of (morphological) derivation, however, has been extremely variable, with no large-scale empirical investigation to date. This paper fills that gap, performing an analysis of CDSM predictions on a large dataset (over 30,000 German derivationally related word pairs). We use linear regression models to analyze CDSM performance and obtain insights into the linguistic factors that influence how predictable the distributional context of a derived word is going to be. We identify various such factors, notably part of speech, argument structure, and semantic regularity.},
address = {Osaka, Japan},
author = {Pad{o}, Sebastian and Herbelot, Aurélie and Kisselew, Max and v {S}najder, Jan},
booktitle = {Proceedings of COLING},
interhash = {9ac7422d8b31a782adca4c6cfc8fa914},
intrahash = {55b824dc6b30f97d8dd8bbe9717ba615},
pages = {1285-1296},
title = {Predictability of Distributional Semantics in Derivational Word Formation},
url = {https://www.aclweb.org/anthology/C/C16/C16-1122.pdf},
year = 2016
}
@InProceedings{pado21:_distr_analy_funct_words,
author = {Sebastian Padó and Daniel Hole},
title = {Distributional Analysis of Polysemous Function Words},
booktitle = {Proceedings of the 13th International Tbilisi
Symposium on Language, Logic and Computation 2019},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = {13206},
year = 2022,
keywords = {conference myown},
url = {https://doi.org/10.1007/978-3-030-98479-3_6},
address = {Batumi, Georgia}}
@InProceedings{pado05:_cross_boots_seman_lexic,
author = {Sebastian Pad{ó} and Mirella Lapata},
title = {Cross-lingual Bootstrapping for Semantic Lexicons: The case of FrameNet},
booktitle = {Proceedings of AAAI-05},
year = {2005},
pages = {1087-1092},
address = {Pittsburgh, PA}
}
@InProceedings{pado05:_cross,
author = {Sebastian Pad{ó} and Mirella Lapata},
title = {Cross-lingual projection of role-semantic information},
booktitle = {Proceedings of HLT/EMNLP 2005},
year = {2005},
address = {Vancouver, BC}
}
@InProceedings{pado06:_optim_const_align_edge_cover_seman_projec,
author = {Sebastian Pad{ó} and Mirella Lapata},
title = {Optimal Constituent Alignment with Edge Covers for Semantic Projection},
booktitle = {Proceedings of ACL-COLING 2006},
year = {2006},
pages = {1161-1168},
address = {Sydney, Australia}
}
@article{pado07:_depen,
author = {Sebastian Pad{ó} and Mirella Lapata},
title = {Dependency-based construction of semantic space models},
journal = {Computational Linguistics},
year = {2007},
volume = {33},
pages = {161-199},
number = {2}
}
@article{pado:_cross_projec_role_infor,
author = {Sebastian Pad{ó} and Mirella Lapata},
title = {Cross-lingual Annotation Projection of Role-semantic Information},
journal = {Artificial Intelligence Research},
year = {2009},
volume = {36},
pages = {307-340}
}
@article{pado13:_desig_realiz_modul_archit_textual_entail,
author = {Sebastian Pad{ó} and Tae-Gil Noh and Asher Stern and Rui Wang and
Roberto Zanoli},
title = {Design and Realization of a Modular Architecture for Textual Entailment},
journal = {Journal of Natural Language Engineering},
year = {2015},
volume = {21},
pages = {167-200},
number = {2}
}
@InProceedings{pado07_flexible,
author = {Sebastian Pad{ó} and Ulrike Pad{ó} and Katrin Erk},
title = {Flexible, Corpus-Based Modelling of Human Plausibility Judgements},
booktitle = {Proceedings of EMNLP/CoNLL 2007},
year = {2007},
address = {Prague, Czech Republic}
}
@InProceedings{pado15:_measur_seman_conten_to_asses_asymm_deriv,
author = {Sebastian Pad{ó} and Alexis Palmer and Max Kisselew and Jan v {S}najder},
title = {Measuring Semantic Content To Assess Asymmetry in Derivation},
booktitle = {Proceedings of the IWCS Workshop on Advances in Distributional Semantics},
year = {2015},
address = {London, UK}
}
@InProceedings{pado08:_nom,
author = {Sebastian Pad{ó} and Marco Pennacchiotti and Caroline Sporleder},
title = {Semantic role assignment for event nominalisations by leveraging
verbal data},
booktitle = {Proceedings of COLING 2008},
year = {2008},
pages = {665-672},
address = {Manchester, UK}
}
@InProceedings{pado07:_annotation,
author = {Sebastian Pad{ó} and Guillaume Pitel},
title = {Annotation pr{é}cise du fran c {c}ais en s{é}mantique de r{ô}les
par projection cross-linguistique},
booktitle = {Proceedings of TALN-07},
year = {2007},
address = {Toulouse, France}
}
@InProceedings{pado16:_smoot_syntax_based_seman_spaces,
abstract = {Syntax-based semantic spaces are more flexible and can potentially better model semantic relatedness than bag-of-words spaces. Their application is however limited by sparsity and restricted coverage. We address these problems by smoothing syntax-based with word-based spaces and investigate when to choose which prediction. We obtain the best results by picking the maximal predicted similarity for each word pair, taking advantage of the tendency of unreliable models to underestimate similarity. We show that smoothing can substantially improve coverage while maintaining prediction quality on two German benchmark tasks.},
address = {Bochum, Germany},
author = {Pad{o}, Sebastian and v {S}najder, Jan and Utt, Jason and Zeller, Britta},
booktitle = {Proceedings of KONVENS},
interhash = {62883c84d9b9c7f796a52f7f3208f399},
intrahash = {1997bb1b688523ed3a869b5aa5464004},
note = {Acceptance rate: 65 pages = {186-191},
title = {Smoothing Syntax-Based Semantic Spaces: Let The Winner Take It All},
url = {http://www.linguistics.rub.de/konvens16/pub/23_konvensproc.pdf},
year = 2016
}
@InProceedings{pado:2013,
author = {Pad{ó}, Sebastian and v {S}najder, Jan and Zeller, Britta},
title = {Derivational Smoothing for Syntactic Distributional Semantics},
booktitle = {Proceedings of ACL},
year = {2013},
address = {Sofia, Bulgaria},
date-added = {2013-05-14 17:37:08 +0000},
date-modified = {2013-05-14 17:38:17 +0000}
}
@InProceedings{pado15:_morph_primin_german,
author = {Sebastian Pad{ó} and Britta Zeller and Jan v {S}najder},
title = {Morphological Priming in German: The Word is Not Enough (Or Is It?)},
booktitle = {Proceedings of NetWords},
year = {2015},
pages = {42-45},
address = {Pisa, Italy}
}
@article{pado:_deter_grader_agreem,
abstract = {The âshort answerâ question format is a widely used tool in educational assessment, in which students write one to three sentences in response to an open question. The answers are subsequently rated by expert graders. The agreement between these graders is crucial for reliable analysis, both in terms of educational strategies and in terms of developing automatic models for short answer grading (SAG), an active research topic in NLP.
This makes it important to understand the properties that inï¬uence grader agreement (such as question diï¬culty, answer length, and answer correctness). However, the twin challenges towards such an understanding are the wide range of SAG corpora in use (which diï¬er along a number of dimensions) and the hierarchical structure of potentially relevant properties (which can be located at the corpus, answer, or question levels).
This article uses generalized mixed eï¬ects models to analyze the eï¬ect of various such properties on grader agreement in six major SAG corpora for two main assessment tasks (language and content assessment). Overall, we ï¬nd broad agreement among corpora, with a number of properties behaving similarly across corpora (e.g., shorter answers and correct answers are easier to grade). Some properties show more corpus-speciï¬c behavior (e.g., the question diï¬culty level), and some corpora are more in line with general tendencies than others. In sum, we obtain a nuanced picture of how the major short answer grading corpora are similar and dissimilar from which we derive suggestions for corpus development and analysis.},
added-at = {2021-05-24T21:53:40.000+0200},
author = {Padó, Ulrike and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/200dc9929fd6c75887fa0cc945c60adfc/sp},
interhash = {859825dbda00ded0351b28f5078e7712},
intrahash = {00dc9929fd6c75887fa0cc945c60adfc},
journal = {Language Resources and Evaluation},
keywords = {article myown},
timestamp = {2021-07-12T21:07:53.000+0200},
title = {Determinants of Grader Agreement: An Analysis of Multiple Short Answer Corpora},
url = {https://doi.org/10.1007/s10579-021-09547-3},
year = 2021
}
@INPROCEEDINGS{Papay2020,
author = {Sean Papay and Roman Klinger and Sebastian Padó},
title = {Dissecting Span Identification Tasks with Performance Prediction},
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing},
year = {2020},
publisher = {Association for Computational Linguistics},
}
@inproceedings{Papay2022,
title={Constraining Linear-chain {CRF}s to Regular Languages},
author={Sean Papay and Roman Klinger and Sebastian Pad{ó}},
booktitle={International Conference on Learning Representations},
year={2022},
url={https://openreview.net/forum?id=jbrgwbv8nD},
url = {https://arxiv.org/abs/2106.07306},
}
@InProceedings{papay17:_evaluat_and_improv_deriv_lexic,
author = {Sean Papay and Gabriella Lapesa and Sebastian Padó},
title = {Evaluating and Improving a Derivational Lexicon with Graph-theoretical Methods},
booktitle = {Proceedings of the DeriMo workshop},
keywords = {workshop myown},
year = 2017,
address = {Milan, Italy}}
@InProceedings{papay19:_quotat_detec_class_corpus_agnos_model,
address = {Varna, Bulgaria},
author = {Papay, Sean and Pad{o}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/270888303d780edb3326ead4b4f61524d/sp},
booktitle = {Proceedings of RANLP},
interhash = {81033b4e54f7f57149e83528bd55aebe},
intrahash = {70888303d780edb3326ead4b4f61524d},
keywords = {sys:relevantfor:tcl-ims conference myown},
timestamp = {2019-10-17T17:40:24.000+0200},
title = {Quotation Detection and Classification with a Corpus-Agnostic Model},
year = 2019
}
@InProceedings{papay2020riqua,
abstract = {We introduce RiQuA (RIch QUotation Annotations), a corpus that provides quotations, including their interpersonal structure (speakers and addressees) for English literary text. The corpus comprises 11 works of 19th-century literature that were manually doubly annotated for direct and indirect quotations. For each quotation, its span, speaker, addressee, and cue are identified (if present). This provides a rich view of dialogue structures not available from other available corpora. We detail the process of creating this dataset, discuss the annotation guidelines, and analyze the resulting corpus in terms of inter-annotator agreement and its properties. RiQuA, along with its annotations guidelines and associated scripts, are publicly available for use, modification, and experimentation.},
added-at = {2020-02-11T14:44:55.000+0100},
address = {Online},
author = {Papay, Sean and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2a9bd0260c8935a0af9ed87270e0caf05/sp},
booktitle = {Proceedings of LREC},
interhash = {0f266b20bfd4e958e7f946dbc1cea8ea},
intrahash = {a9bd0260c8935a0af9ed87270e0caf05},
keywords = {conference myown},
pages = {835-841},
timestamp = {2020-12-07T16:42:29.000+0100},
title = {{RiQuA}: A Corpus of Rich Quotation Annotation for English Literary Text},
url = {https://www.aclweb.org/anthology/2020.lrec-1.104},
year = 2020
}
@InProceedings{papay18:_addres_low_resour_scenar_charac_embed,
author = {Sean Papay and Sebastian Padó and {Ngoc Thang} Vu},
title = {Addressing Low-Resource Scenarios with Character-aware Embeddings},
booktitle = {Proceedings of the NAACL Workshop on Subword and Character Level Models},
url= {https://aclweb.org/anthology/W18-1204.pdf},
abstract = {Most modern approaches to computing word embeddings assume the availability of text corpora with billions of words. In this paper, we explore a setup where only corpora with millions of words are available, and many words in any new text are out of vocabulary. This setup is both of practical interest â modeling the situation for specific domains and low-resource languages â and of psycholinguistic interest, since it corresponds much more closely to the actual experiences and challenges of human language learning and use. We evaluate skip-gram word embeddings and two types of character-based embeddings on word relatedness prediction. On large corpora, performance of both model types is equal for frequent words, but character awareness already helps for infrequent words. Consistently, on small corpora, the character-based models perform overall better than skip-grams. The concatenation of different embeddings performs best on small corpora and robustly on large corpora.},
keywords = {workshop myown},
year = 2018,
address = {New Orleans, LA}}
@inproceedings{park24:_multi_dimen_machin_trans_evaluat,
added-at = {2024-02-20T11:29:14.000+0100},
address = {Torino, Italy},
author = {Park, Dojun and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/27aa477beca1e879b5c9e4285e0c4fa28/sp},
booktitle = {Proceedings of LREC-COLING},
interhash = {971e0905c7aef16226ed15f102469abd},
intrahash = {7aa477beca1e879b5c9e4285e0c4fa28},
keywords = {conference myown},
timestamp = {2024-05-27T17:29:55.000+0200},
title = {Multi-Dimensional Machine Translation Evaluation: Model Evaluation and Resource for Korean},
url = {https://aclanthology.org/2024.lrec-main.1024.pdf},
year = 2024
}
@InProceedings{peirsman10:_cross_lingual_induc_of_selec,
author = {Yves Peirsman and Sebastian Pad{ó}},
title = {Cross-lingual Induction of Selectional Preferences with Bilingual
Vector Spaces},
booktitle = {Proceedings of NAACL 2010},
year = {2010},
address = {Los Angeles, CA}
}
@article{peirsman:_seman_relat_in_bilin_vector_spaces,
author = {Yves Peirsman and Sebastian Pad{ó}},
title = {Semantic relations in bilingual vector spaces},
journal = {ACM Transactions in Speech and Language Processing},
year = {2011},
volume = {8},
pages = {3:1-3:21},
number = {2}
}
@InProceedings{Plazadelarco2021,
author = {Flor M. {Plaza-del-Arco} and Sercan Halat and
Sebastian Pad{ó} and Roman Klinger},
title = {Multi-Task Learning with Sentiment, Emotion, and
Target Detection to Recognize Hate Speech and
Offensive Language},
url = {http://ceur-ws.org/Vol-3159/T1-30.pdf},
year = 2021,
pages = {297-318},
booktitle = {FIRE 2021 Working Notes},
}
@InProceedings{pross17:_integ,
author = {Tillmann Pross and Antje Rossdeutscher and Gabriella Lapesa and Sebastian Padó},
title = {Integrating lexical-conceptual and distributional semantics: a case report},
booktitle = {Proceedings of the Amsterdam Colloquium},
address = {Amsterdam, The Netherlands},
year = 2017}
@InProceedings{pross16:_over,
author = {Tillmann Pross and Antje Rossdeutscher and Sebastian Padó and Gabriella Lapesa and Max Kisselew},
title = {'Over reference': A comparative study on {G}erman prefix verbs},
booktitle = {ESSLLI SemRefPlus Workshop: Referential semantics one
step further: Incorporating insights from conceptual
and distributional approaches to meaning},
year = 2016,
keywords = {myown abstract},
address = {Bolzano, Italy}}
@InProceedings{Pross2017b,
author = {Pross, Tillmann and Ro{ß}deutscher, Antje and Padó, Sebastian and Lapesa, Gabriella and Kisselew, Max},
title = {Integrating lexical-conceptual and distributional semantics: a case report},
booktitle = {Proceedings of the Amsterdam Colloquium 2017},
year = {2017},
pages = {75-85},
owner = {tillmannpross},
timestamp = {2017.11.08},
}
@InProceedings{rauf19:_learn_trilin_diction_urdu,
author = {Moiz Rauf and Sebastian Padó},
title = {Learning Trilingual Dictionaries for {U}rdu -- {R}oman {U}rdu -- {E}nglish},
booktitle = {Proceedings of the ACL Workshop on Widening NLP},
year = 2019,
keywords = {myown abstract},
address = {Florence, Italy}}
@InProceedings{riedl19:_clust_based_artic_ident_histor_newsp,
author = {Martin Riedl and Daniela Betz and Sebastian Padó},
title = {Clustering-Based Article Identification in Historical Newspapers},
booktitle = {Proceedings of the NAACL LaTeCH-CLfL workshop},
year = 2019,
keywords = {workshop myown},
url={https://aclweb.org/anthology/papers/W/W19/W19-2502/},
address = {Minneapolis, MN}}
@InProceedings{riedl-pado-2018-named,
title = "A Named Entity Recognition Shootout for {G}erman",
author = "Riedl, Martin and
Pad{ó}, Sebastian",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/P18-2020",
pages = "120-125",
abstract = "We ask how to practically build a model for German named entity recognition (NER) that performs at the state of the art for both contemporary and historical texts, i.e., a big-data and a small-data scenario. The two best-performing model families are pitted against each other (linear-chain CRFs and BiLSTM) to observe the trade-off between expressiveness and data requirements. BiLSTM outperforms the CRF when large datasets are available and performs inferior for the smallest dataset. BiLSTMs profit substantially from transfer learning, which enables them to be trained on multiple corpora, resulting in a new state-of-the-art model for German NER on two contemporary German corpora (CoNLL 2003 and GermEval 2014) and two historic corpora.",
}
@InProceedings{scheible16:_model_archit_quotat_detec,
abstract = {Quotation detection is the task of locating spans of quoted speech in text. The state of the art treats this problem as a sequence labeling task and employs linear-chain conditional random fields. We question the efficacy of this choice: The Markov assumption in the model prohibits it from making joint decisions about the begin, end, and internal context of a quotation. We perform an extensive analysis with two new model architectures. We find that (a), simple boundary classification combined with a greedy prediction strategy is competitive with the state of the art; (b), a semi-Markov model significantly outperforms all others, by relaxing the Markov assumption.},
address = {Berlin, Germany},
author = {Scheible, Christian and Klinger, Roman and Pad{o}, Sebastian},
booktitle = {Proceedings of ACL},
interhash = {c77dfb02001fe26838c9936221ace71a},
intrahash = {fbeab4234e533692e6d7e938fccff533},
note = {Acceptance rate: 25 pages = {1736-1745},
title = {Model Architectures for Quotation Detection},
url = {https://www.aclweb.org/anthology/P/P16/P16-1164.pdf},
year = 2016
}
@InProceedings{schmid24:_how,
author = {Marla-Sophie Schmid and Christina Kühnl and Florian Omiecienski and Sebastian Padó},
title = {How can business-to-business salespeople get out more of their social media posts?},
booktitle = {Proceedings of the Annual Conference of the European Marketing Academy},
year = 2024,
address = {Bucharest, Romania}}
@InProceedings{schuff17:_annot_model_and_analy_of,
author = {Hendrik Schuff and Jeremy Barnes and Julian Mohme and Sebastian Padó and Roman Klinger},
title = {Annotation, Modelling and Analysis of Fine-Grained
Emotions on a Stance and Sentiment Detection Corpus},
booktitle = {Proceedings of the EMNLP WASSA workshop},
keywords = {workshop myown},
year = 2017,
pages = {13-23},
abstract = {There is a rich variety of data sets for sentiment analysis
(viz., polarity and subjectivity classification). For the more
challenging task of detecting discrete emotions following the
definitions of Ekman and Plutchik, however, there are much fewer
data sets, and notably no resources for the social media
domain. This paper contributes to closing this gap by extending the
textit {SemEval 2016 stance and sentiment dataset} with emotion
annotation. We (a) analyse annotation reliability and annotation
merging; (b) investigate the relation between emotion annotation and
the other annotation layers (stance, sentiment); (c) report
modelling results as a baseline for future work.},
url = {https://www.aclweb.org/anthology/W17-5203.pdf},
address = {Copenhagen, Denmark}}
@InProceedings{shafaei17:_towar_cross_lingual_compar_of_deriv_lexic,
author = {Elnaz Shafaei and Diego Frassinelli and Gabriella Lapesa and Sebastian Padó},
title = {Towards Cross-Lingual Comparability of Derivational Lexicons: An Extraction Algorithm for {CELEX}},
keywords = {workshop myown},
booktitle = {Proceedings of the DeriMo workshop},
year = 2017,
address = {Milan, Italy}}
@article{sikos22:_improv_multil_frame_ident_estim_frame_trans,
author = {Jen Sikos and Michael Roth and Sebastian Padó},
title = {Improving Multilingual Frame Identification by Estimating Frame Transferability},
year = {2022},
journal = {Linguistic Issues in Language Technology},
keywords = {article myown},
volume = {19},
url = {https://doi.org/10.33011/lilt.v19i.939}}
@Article{sikos17:_framen_using_relat_as_sourc_concep_parap,
author = {Jennifer Sikos and Sebastian Padó},
title = {FrameNet's 'Using' Relation As Source of Concept-driven Paraphrases},
journal = {Constructions and Frames},
keywords = {myown},
volume = {10},
number = {1},
url = {https://doi.org/10.1075/cf.00010.sik},
pages = {38-60},
year = 2018}
@InProceedings{sikos18:_using_embed_compar_framen_frames_acros_languag,
author = {Jennifer Sikos and Sebastian Padó},
title = {Using Embeddings to Compare {FrameNet} Frames Across Languages},
booktitle = {Proceedings of the COLING Workshop on Linguistic Resources for Natural Language Processing},
year = 2018,
keywords = {workshop myown},
abstract = {Much of the recent interest in Frame Semantics is fueled by the substantial extent of its applicability across languages. At the same time, lexicographic studies have found that the applicability of individual frames can be diminished by cross-lingual divergences regarding polysemy, syntactic valency, and lexicalization. Due to the large effort involved in manual investigations, there are so far no broad-coverage resources with "problematic" frames for any language pair.
Our study investigates to what extent multilingual vector representations of frames learned from manually annotated corpora can address this need by serving as a wide coverage source for such divergences. We present a case study for the language pair English â German using the FrameNet and SALSA corpora and find that inferences can be made about cross-lingual frame applicability using a vector space model.},
address = {Santa Fe, NM},
pages = {91-101},
url = {https://aclweb.org/anthology/W18-3813}}
@InProceedings{sikos19:_frame_ident_categ,
author = {Jennifer Sikos and Sebastian Padó},
title = {Frame Identification as Categorization: Exemplars vs Protoypes in {Embeddingland}},
keywords = {conference myown},
booktitle = {Proceedings of IWCS},
year = 2019,
url = {https://aclweb.org/anthology/papers/W/W19/W19-0425/},
abstract = {Categorization is a central capability of human cognition, and a number of theories have been developed to account for properties of categorization. Even though many tasks in semantics also involve categorization of some kind, theories of categorization do not play a major role in contemporary research in computational linguistics. This paper follows the idea that embedding-based models of semantics lend themselves well to being formulated in terms of classical categorization theories. The benefit is a space of model families that enables (a) the formulation of hypotheses about the impact of major design decisions, and (b) a transparent assessment of these decisions. We instantiate this idea on the task of frame-semantic frame identification. We define four models that cross two design variables: (a) the choice of prototype vs. exemplar categorization, corresponding to different degrees of generalization applied to the input, and (b) the presence vs. absence of a fine-tuning step, corresponding to generic vs. task-adaptive categorization. We find that for frame identification, generalization and task-adaptive categorization both yield substantial benefits. Our prototype-based, fine-tuned model, which combines the best choices over these variables, establishes a new state-of-the-art in frame identification.},
address = {Gothenburg, Sweden}}
@InProceedings{spohr07:_induc_comput_lexic_corpus_syntac_seman_annot,
author = {Dennis Spohr and Aljoscha Burchardt and Sebastian Pad{ó} and Anette
Frank and Ulrich Heid},
title = {Inducing a Computational Lexicon from a Corpus with Syntactic and
Semantic Annotation},
booktitle = {Proceedings of IWCS-7},
year = {2007},
address = {Tilburg, The Netherlands}
}
@InProceedings{stadelmaier19:_model_paths_explain_knowl_base_compl,
author = {Josua Stadelmaier and Sebastian Padó},
title = {Modeling Paths for Explainable Knowledge Base Completion},
booktitle = {Proceedings of the ACL BlackboxNLP workshop},
year = 2019,
keywords = {workshop myown},
address = {Florence, Italy}}
@InProceedings{stehwien-pado:2015,
author = {Sabrina Stehwien and Sebastian Pad{ó}},
title = {Generalization in Native Language Identification - Learners versus Scientists},
booktitle = {Proceedings of CLIC-IT},
year = 2015,
pages = {264-268},
address = {Trento, Italy}}
@article{stehwien16:_model_nativ_languag_ident_acros_text_types,
author = {Stehwien, Sabrina and Pad{o}, Sebastian},
interhash = {93d1981652863ea03bfc38a3f76bff25},
intrahash = {bd75ab2ce37d886dc97b17e1319a0fc0},
journal = {Italian Journal of Computational Linguistics},
number = 1,
pages = {32-45},
title = {Native Language Identification Across Text Types: How Special Are Scientists?},
url = {http://www.ai-lc.it/IJCoL/v2n1/02_stehwien_pado.pdf},
volume = 2,
year = 2016
}
@InProceedings{thejas19:_text_joint_predic_numer_categ,
address = {Varna, Bulgaria},
author = {Thejas, V and Gupta, Abhijeet and Pad{o}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2be9c6dd7d7ff0156cce6ef67f34efb60/sp},
booktitle = {Proceedings of RANLP},
interhash = {aadafde536a389261411024934d94b82},
intrahash = {be9c6dd7d7ff0156cce6ef67f34efb60},
keywords = {sys:relevantfor:tcl-ims conference myown},
timestamp = {2019-10-17T17:40:19.000+0200},
title = {Text-based Joint Prediction of Numeric and Categorical Attributes of Entities in Knowledge Bases},
year = 2019
}
@article{thill:_impor_of_rich_embod_in,
author = {Serge Thill and Sebastian Pad{ó} and Tom Ziemke},
title = {On the importance of a rich embodiment in the grounding of concepts:
{P}erspectives from embodied cognitive science and computational
linguistics},
journal = {Topics in Cognitive Science},
year = {2014},
volume = {6},
pages = {545-558},
number = {3}
}
@InProceedings{todirascu12:_frenc_and_german_corpor_for,
author = {Amalia Todirascu and Sebastian Pad{ó} and Max Kisselew and Jennifer
Krisch and Ulrich Heid},
title = {French and {G}erman corpora for audience-based text classification},
booktitle = {Proceedings of LREC 2012},
year = {2012},
address = {Istanbul, Turkey}
}
@inproceedings{Troiano2020,
title = "Lost in Back-Translation: Emotion Preservation in
Neural Machine Translation",
author = "Troiano, Enrica and Klinger, Roman and Pad{ó},
Sebastian",
booktitle = "Proceedings of the 28th International Conference on
Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational
Linguistics",
url = "https://www.aclanthology.org/2020.coling-main.384",
doi = "10.18653/v1/2020.coling-main.384",
pages = "4340-4354",
}
@InProceedings{troiano20:_lost_back_trans,
abstract = {Machine translation provides powerful methods to convert text between languages, and is therefore a technology enabling a multilingual world. An important part of communication, however, takes place at the non-propositional level (e.g., politeness, formality, emotions), and it is far from clear whether current MT methods properly translate this information. This paper investigates the specific hypothesis that the non-propositional level of emotions is at least partially lost in MT. We carry out a number of experiments in a back-translation setup and establish that (1) emotions are indeed partially lost during translation; (2) this tendency can be reversed almost completely with a simple re-ranking approach informed by an emotion classifier, taking advantage of diversity in the n-best list; (3) the re-ranking approach can also be applied to change emotions, obtaining a model for emotion style transfer. An in-depth qualitative analysis reveals that there are recurring linguistic changes through which emotions are toned down or amplified, such as change of modality.},
added-at = {2020-09-30T20:30:13.000+0200},
address = {Online},
author = {Troiano, Enrica and Klinger, Roman and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/22499813b5bc2b1640c364ca880e35c2a/sp},
booktitle = {Proceedings of COLING},
interhash = {440a181985303a1b360b4d383c68e792},
intrahash = {2499813b5bc2b1640c364ca880e35c2a},
keywords = {conference myown},
timestamp = {2020-12-07T16:40:50.000+0100},
title = {Lost in Backtranslation: Emotion Preservation in Neural Machine Translation},
url = {https://www.aclweb.org/anthology/2020.coling-main.384/},
year = 2020
}
@article{troiano22:_relat_predic_argum_struc_emotion_text,
added-at = {2023-08-14T13:14:25.000+0200},
author = {Troiano, Enrica and Klinger, Roman and Pad{ó}, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2a95eb77642d869e237535205ea710115/sp},
interhash = {a4fcafe99c771852f92d63a1bb813ba1},
intrahash = {a95eb77642d869e237535205ea710115},
journal = {Northern European Journal of Language Technology},
keywords = {article myown},
number = 1,
timestamp = {2023-09-15T16:25:24.000+0200},
title = {On the Relationship between Frames and Emotionality in Text},
url = {https://doi.org/10.3384/nejlt.2000-1533.2023.4361},
volume = 9,
year = 2023
}
@InProceedings{Troiano2019,
title = "Crowdsourcing and Validating Event-focused Emotion Corpora for {G}erman and {E}nglish",
author = "Troiano, Enrica and
Pad{ó}, Sebastian and
Klinger, Roman",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/P19-1391",
pdf = {https://www.romanklinger.de/publications/TroianoPadoKlingerNAACL2019.pdf},
pages = "4005-4011",
entrysubtype={conf}
}
@inproceedings{Troiano2021,
title = "Emotion Ratings: How Intensity, Annotation
Confidence and Agreements are Entangled",
author = "Troiano, Enrica and Pad{ó}, Sebastian and Klinger,
Roman",
booktitle = "Proceedings of the Eleventh Workshop on
Computational Approaches to Subjectivity, Sentiment
and Social Media Analysis",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclanthology.org/2021.wassa-1.5",
pages = "40-49",
}
@InProceedings{troiano21:_entan_annot_confid_emotion_inten,
added-at = {2021-02-19T16:56:48.000+0100},
author = {Troiano, Enrica and Padó, Sebastian and Klinger, Roman},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2f3ce79d1f08905eedef2e329c84860cf/sp},
booktitle = {Proceedings of the EACL WASSA workshop},
interhash = {e73d543dfb468d4838ec84529dde3831},
intrahash = {f3ce79d1f08905eedef2e329c84860cf},
keywords = {myown workshop},
pages = {50-61},
timestamp = {2021-04-21T15:33:12.000+0200},
title = {Emotion Ratings: How Intensity, Annotation Conï¬dence and Agreements are Entangled},
url = {https://www.aclweb.org/anthology/2021.wassa-1.5},
year = 2021
}
@InProceedings{utt2013curious,
author = {Utt, J. and Lenci, A. and Pad{ó}, S. and Zarcone, A.},
title = {The Curious Case of Metonymic Verbs: A Distributional Characterization},
booktitle = {{Proceedings of the IWCS Workshop "Towards A Formal Distributional
Semantics"}},
year = {2013},
address = {Potsdam, Germany}
}
@InProceedings{utt11:_ontol_based_distin_between_polys_and_homon,
author = {Jason Utt and Sebastian Pad{ó}},
title = {Ontology-based Distinction between Polysemy and Homonymy},
booktitle = {Proceedings of IWCS 2011},
year = {2011},
address = {Oxford, UK}
}
@InProceedings{utt12:_distr_memor_german,
author = {Jason Utt and Sebastian Pad{ó}},
title = {A Distributional Memory for {G}erman},
booktitle = {Proceedings of the KONVENS workshop on recent developments and applications
of lexical-semantic resources},
year = {2012},
address = {Vienna, Austria}
}
@article{utt-pado:2014:tacl,
author = {Jason Utt and Sebastian Pad{ó}},
title = {Crosslingual and Multilingual Construction of Syntax-Based Vector
Space Models},
journal = {Transactions of the Association of Computational Linguistics},
year = {2014},
volume = {2},
pages = {245-258}
}
@InProceedings{varvara16:_quant,
author = {Rossella Varvara and Gabriella Lapesa and Sebastian Padó},
title = {Quantifying regularity in morphological processes: An ongoing study on nominalization in {G}erman},
booktitle = {ESSLLI DISSALT Workshop: Distributional Semantics and Semantic Theory},
year = 2016,
keywords = {myown abstract},
address = {Bolzano, Italy}}
@article{varvara:_groun_seman_trans_in_contex,
abstract = { We present the results of a large-scale corpus-based comparison of
two German event nominalization patterns: deverbal nouns in
-ung (e.g., die Evaluierung, 'the evaluation')
and nominal infinitives (e.g., das Evaluieren, 'the
evaluating'). Among the many available event nominalization
patterns for German, we selected these two because they are both
highly productive and challenging from the semantic point of view.
Both patterns are known to keep a tight relation with the event
denoted by the base verb, but with different nuances. Our study
targets a better understanding of the differences in their semantic
import.
The key notion of our comparison is that of semantic transparency,
and we propose a usage-based characterization of the relationship
between derived nominals and their bases. Using methods from
distributional semantics, we bring to bear two concrete measures of
transparency which highlight different nuances: the first one,
cosine, detects nominalizations which are semantically
similar to their bases; the second one, distributional
inclusion, detects nominalizations which are used in a subset of
the contexts of the base verb. We find that the inclusion measure
helps in characterizing the difference between the two types of
nominalizations, in relation with the traditionally considered
variable of relative frequency (Hay, 2001). We further benefit from
our distributional analysis to frame our comparison in the broader
coordinates of the inflection vs. derivation cline.},
added-at = {2021-04-22T09:47:08.000+0200},
author = {Varvara, Rossella and Lapesa, Gabriella and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2710319e2b7cd0a488842da3889289927/sp},
interhash = {3cb922fdbc49dea0b6529ec82e7a0133},
intrahash = {710319e2b7cd0a488842da3889289927},
journal = {Morphology},
keywords = {article myown},
pages = {409-446},
timestamp = {2021-10-25T09:36:36.000+0200},
title = {Grounding Semantic Transparency In Context: A Distributional Semantic Study on {G}erman Event Nominalizations},
url = {https://doi.org/10.1007/s11525-021-09382-w},
volume = 31,
year = 2021
}
@InProceedings{snajder:2013,
author = { v {S}najder, Jan and Pad{ó}, Sebastian and Agi{c}, { v Z}eljko},
title = {Building and Evaluating a Distributional Memory for Croatian},
booktitle = {Proceedings of ACL},
year = {2013},
address = {Sofia, Bulgaria},
date-added = {2013-05-14 17:37:08 +0000},
date-modified = {2013-05-14 17:39:22 +0000}
}
@article{westera:_word,
abstract = { Cognitive scientists have long used distributional semantic
representations of categories. The predominant approach uses
distributional representations of category-denoting nouns, like
"city" for the category city. We propose a novel scheme that
represents categories as prototypes over representations of
textit {names} of its members, such as "Barcelona", "Mumbai",
and "Wuhan" for the category city. This name-based representation
empirically outperforms the noun-based representation on two
experiments (modelling human judgments of category relatedness and
predicting category membership) with particular improvements for
ambiguous nouns. We discuss the model complexity of both classes of
models and argue that the name-based model has superior explanatory
potential with regard to concept acquisition.},
added-at = {2021-07-09T11:27:40.000+0200},
author = {Westera, Matthijs and Gupta, Abhijeet and Boleda, Gemma and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2e7dc0d19377b7de5c332f392f55fe283/sp},
interhash = {2672b0d253e761282b859ae438a6277e},
intrahash = {e7dc0d19377b7de5c332f392f55fe283},
journal = {Cognitive Science},
keywords = {article myown},
number = 9,
pages = {e13029},
timestamp = {2021-09-07T16:13:44.000+0200},
title = {Distributional models of category concepts based on names of category members},
url = {https://doi.org/10.1111/cogs.13029},
volume = 45,
year = 2021
}
@inproceedings{lapesa23:_polit,
added-at = {2023-06-30T17:12:45.000+0200},
address = {Ingolstadt, Germany},
author = {Zaberer, Urs and Pad{ó}, Sebastian and Lapesa, Gabriella},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/20744d03dc358ce83af99d8cb647ea0f2/sp},
booktitle = {Proceedings of KONVENS},
interhash = {67b44543e42d8858c71a0fc994c417fa},
intrahash = {0744d03dc358ce83af99d8cb647ea0f2},
keywords = {conference myown},
timestamp = {2023-10-19T15:58:48.000+0200},
title = {Political claim identification and categorization in a multilingual setting: {F}irst experiments},
url = {https://aclanthology.org/2023.konvens-main.22.pdf},
year = 2023
}
@InProceedings{zarcone2013fitting,
author = {Zarcone, A. and Lenci, A. and Pad{ó}, S. and Utt, J.},
title = {{Fitting, not clashing! A distributional semantic model of logical
metonymy}},
booktitle = {Proceedings of the 10th {International Conference on Computational
Semantics}},
year = {2013},
address = {Potsdam, Germany},
note = {(presented as poster)}
}
@InProceedings{zarcone2010like,
author = {Zarcone, A. and Pad{ó}, S.},
title = {{"I like work: I can sit and look at it for hours" - Type clash vs.
plausibility in covert event recovery}},
booktitle = {{Proceedings of the Interdisciplinary Workshop on Verbs (Verb 2010)}},
year = {2010},
pages = {209-214},
address = {Pisa, Italy},
note = {(presented as poster)}
}
@misc{zarcone2011crowdsourcing,
author = {Zarcone, A. and Pad{ó}, S.},
title = {A crowdsourcing study of logical metonymy},
howpublished = {{Talk at Crowdscientist 2011, Boulder, CO}},
year = {2011},
note = {(abstract submission)}
}
@InProceedings{zarcone2011generalized,
author = {Zarcone, A. and Pad{ó}, S.},
title = {Generalized event knowledge in logical metonymy resolution},
booktitle = {Proceedings of the 33rd Annual Meeting of the Cognitive Science Society},
year = {2011},
pages = {944-949},
address = {Boston, MA},
note = {(presented as poster)}
}
@misc{zarcone2013disentangling,
author = {Zarcone, A. and Pad{ó}, S.},
title = {{Logical metonymy: Disentangling object type and thematic fit}},
howpublished = {Poster presented at the 19th {Conference on Architectures and Mechanisms
for Language Processing (AMLaP), Marseille, France}},
year = {2013},
note = {(abstract submission)}
}
@InProceedings{zarcone2012inferring,
author = {Zarcone, A. and Pad{ó}, S. and Lenci, A.},
title = {Inferring covert events in logical metonymies: a probe recognition
experiment},
booktitle = {{Proceedings of the 34th Annual Meeting of the Cognitive Science
Society} },
year = {2012},
pages = {1215-1220},
address = {Sapporo, Japan}
}
@InProceedings{zarcone2012modeling,
author = {Zarcone, A. and Utt, J. and Pad{ó}, S.},
title = {Modeling covert event retrieval in logical metonymy: probabilistic
and distributional accounts},
booktitle = {Proceedings of the 3rd {Workshop on Cognitive Modeling and Computational
Linguistics}},
year = {2012},
pages = {70-79},
address = {Montr{é}al, Canada}
}
@Article{zarcone17:_compl_coerc,
author = {Alessandra Zarcone and Ken McRae and Alessandro Lenci and Sebastian Padó},
title = {Complement Coercion: The Joint Effects of Type and Typicality },
journal = {Frontiers in Psychology},
url = {https://doi.org/10.3389/fpsyg.2017.01987},
keywords = {myown},
volume = 8,
pages = 1987,
year = 2017}
@article{zarcone2014logical,
author = {Alessandra Zarcone and Sebastian Pad{ó} and Alessandro Lenci},
title = {{Logical metonymy resolution in a words-as-cues framework: Evidence
from self-paced reading and probe recognition}},
journal = {Cognitive Science},
volume = {38},
pages = {973-996},
number = {5},
year = {2014},
url = {https://dx.doi.org/10.1111/cogs.12108}
}
@InProceedings{zarcone15:_same_same_differ,
author = {Alessandra Zarcone and Sebastian Pad{ó} and Alessandro Lenci},
title = {Same Same but Different: Type and Typicality in a Distributional
Model of Complement Coercion},
booktitle = {Proceedings of NetWords},
year = {2015},
pages = {91-94},
address = {Pisa, Italy}
}
@InProceedings{zeller12:_corpus_based_acquis_of_suppor,
author = {Britta Zeller and Sebastian Pad{ó}},
title = {Corpus-Based Acquisition of Support Verb Constructions for {P}ortuguese},
booktitle = {Proceedings of PROPOR 2012},
year = {2012},
address = {Coimbra, Portugal}
}
@InProceedings{zeller13:_textual_entail_datas_german_web_forum_text,
author = {Britta Zeller and Sebastian Pad{ó}},
title = {A Textual Entailment Dataset from {G}erman Web Forum Text},
booktitle = {Proceedings of IWCS},
year = {2013},
address = {Potsdam, Germany}
}
@InProceedings{zeller14:_towar_seman_valid_deriv_lexic,
author = {Britta Zeller and Sebastian Pad{ó} and Jan v {S}najder},
title = {Towards Semantic Validation of a Derivational Lexicon},
booktitle = {Proceedings of COLING},
year = {2014},
pages = {1728-1739},
address = {Dublin, Ireland},
url = {https://www.aclweb.org/anthology/C14-1163}
}
@InProceedings{zeller:2013,
author = {Zeller, Britta and v {S}najder, Jan and Pad{ó}, Sebastian},
title = {{DErivBase}: Inducing and Evaluating a Derivational Morphology Resource
for {G}erman},
booktitle = {Proceedings of ACL},
year = {2013},
address = {Sofia, Bulgaria},
date-added = {2013-05-14 17:37:08 +0000},
date-modified = {2013-05-14 17:39:07 +0000}
}