BibTeX

@InProceedings{adel2018domainindependent,
  address = {Brussels, Belgium},
  author = {Adel, Heike and Bostan, Laura Ana Maria and Papay, Sean and Padó, Sebastian and Klinger, Roman},
  booktitle = {Proceedings of EMNLP},
  title = {DERE: A task and domain-independent slot filling framework for declarative relation extraction},
  abstract = { Most machine learning systems for natural language processing are
  tailored to specific tasks. As a result, comparability of models
  across tasks is missing and their applicability to new tasks is
  limited. This affects end users without machine learning experience
  as well as model developers.  To address these limitations, we
  present DeRe, a novel framework for declarative specification and
  compilation of template-based information extraction. It uses a
  generic specification language for the task and for data annotations
  in terms of spans and frames. This formalism enables the
  representation of a large variety of natural language processing
  challenges.  The backend can be instantiated by different models,
  following different paradigms. The clear separation of frame
  specification and model backend will ease the implementation of new
  models and the evaluation of different models across different
  tasks. Furthermore, it simplifies transfer learning, joint learning
  across tasks and/or domains as well as the assessment of model
  generalizability. DeRe is available as open source.},
  year = {2018},
  url = {https://aclweb.org/anthology/D18-2008.pdf},
  pages     = {42-47}}

@InProceedings{kaiser-etal-2020-IMS,
  address = {Barcelona, Spain},
  author = {Kaiser, Jens and Schlechtweg, Dominik and Papay, Sean and {Schulte im Walde}, Sabine},
  booktitle = {Proceedings of the 14th International Workshop on Semantic Evaluation},
  publisher = {Association for Computational Linguistics},
  title = {{IMS} at {S}em{E}val-2020 {T}ask 1: {How low can you go? Dimensionality in Lexical Semantic Change Detection}},
  year = 2020
}

@INPROCEEDINGS{Papay2020,
  author = {Sean Papay and Roman Klinger and Sebastian Padó},
  title = {Dissecting Span Identification Tasks with Performance Prediction},
  booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing},
  year = {2020},
  publisher = {Association for Computational Linguistics},
}

@inproceedings{Papay2022,
title={Constraining Linear-chain {CRF}s to Regular Languages},
author={Sean Papay and Roman Klinger and Sebastian Pad{ó}},
booktitle={International Conference on Learning Representations},
year={2022},
url={https://openreview.net/forum?id=jbrgwbv8nD},
  url = {https://arxiv.org/abs/2106.07306},
}
 

@InProceedings{papay17:_evaluat_and_improv_deriv_lexic,
  author = {Sean Papay and Gabriella Lapesa and Sebastian Padó},
  title =  {Evaluating and Improving a Derivational Lexicon with Graph-theoretical Methods},
  booktitle = {Proceedings of the DeriMo workshop},
  keywords =  {workshop myown},
  year = 	 2017,
  address = 	 {Milan, Italy}}

@InProceedings{papay19:_quotat_detec_class_corpus_agnos_model,
  address = {Varna, Bulgaria},
  author = {Papay, Sean and Pad{o}, Sebastian},
  biburl = {https://puma.ub.uni-stuttgart.de/bibtex/270888303d780edb3326ead4b4f61524d/sp},
  booktitle = {Proceedings of RANLP},
  interhash = {81033b4e54f7f57149e83528bd55aebe},
  intrahash = {70888303d780edb3326ead4b4f61524d},
  keywords = {sys:relevantfor:tcl-ims conference myown},
  timestamp = {2019-10-17T17:40:24.000+0200},
  title = {Quotation Detection and Classification with a Corpus-Agnostic Model},
  year = 2019
}

@InProceedings{papay2020riqua,
  abstract = {We introduce RiQuA (RIch QUotation Annotations), a corpus that provides quotations, including their interpersonal structure (speakers and addressees) for English literary text. The corpus comprises 11 works of 19th-century literature that were manually doubly annotated for direct and indirect quotations. For each quotation, its span, speaker, addressee, and cue are identified (if present). This provides a rich view of dialogue structures not available from other available corpora. We detail the process of creating this dataset, discuss the annotation guidelines, and analyze the resulting corpus in terms of inter-annotator agreement and its properties. RiQuA, along with its annotations guidelines and associated scripts, are publicly available for use, modification, and experimentation.},
  added-at = {2020-02-11T14:44:55.000+0100},
  address = {Online},
  author = {Papay, Sean and Padó, Sebastian},
  biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2a9bd0260c8935a0af9ed87270e0caf05/sp},
  booktitle = {Proceedings of LREC},
  interhash = {0f266b20bfd4e958e7f946dbc1cea8ea},
  intrahash = {a9bd0260c8935a0af9ed87270e0caf05},
  keywords = {conference myown},
  pages = {835-841},
  timestamp = {2020-12-07T16:42:29.000+0100},
  title = {{RiQuA}: A Corpus of Rich Quotation Annotation for English Literary Text},
  url = {https://www.aclweb.org/anthology/2020.lrec-1.104},
  year = 2020
}

@InProceedings{papay18:_addres_low_resour_scenar_charac_embed,
  author = {Sean Papay and Sebastian Padó and {Ngoc Thang} Vu},
  title =        {Addressing Low-Resource Scenarios with Character-aware Embeddings},
  booktitle = {Proceedings of the NAACL Workshop on Subword and Character Level Models},
  url= {https://aclweb.org/anthology/W18-1204.pdf},
  abstract = {Most modern approaches to computing word embeddings assume the availability of text corpora with billions of words. In this paper, we explore a setup where only corpora with millions of words are available, and many words in any new text are out of vocabulary. This setup is both of practical interest – modeling the situation for specific domains and low-resource languages – and of psycholinguistic interest, since it corresponds much more closely to the actual experiences and challenges of human language learning and use. We evaluate skip-gram word embeddings and two types of character-based embeddings on word relatedness prediction. On large corpora, performance of both model types is equal for frequent words, but character awareness already helps for infrequent words. Consistently, on small corpora, the character-based models perform overall better than skip-grams. The concatenation of different embeddings performs best on small corpora and robustly on large corpora.},
  keywords =  {workshop myown},
  year =         2018,
  address =      {New Orleans, LA}}