BibTeX

@InProceedings{arvivetal2021relation,
    title = "On the Relation between Syntactic Divergence and Zero-Shot Performance",
  author = "Arviv, Ofir  and
      Nikolaev, Dmitry  and
      Karidi, Taelin  and
      Abend, Omri",
    booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
    month = nov,
    year = "2021",
    address = "Online and Punta Cana, Dominican Republic",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.emnlp-main.394",
    pages = "4803-4817",
    abstract = "We explore the link between the extent to which syntactic relations are preserved in translation and the ease of correctly constructing a parse tree in a zero-shot setting. While previous work suggests such a relation, it tends to focus on the macro level and not on the level of individual edges{---}a gap we aim to address. As a test case, we take the transfer of Universal Dependencies (UD) parsing from English to a diverse set of languages and conduct two sets of experiments. In one, we analyze zero-shot performance based on the extent to which English source edges are preserved in translation. In another, we apply three linguistically motivated transformations to UD, creating more cross-lingually stable versions of it, and assess their zero-shot parsability. In order to compare parsing performance across different schemes, we perform extrinsic evaluation on the downstream task of cross-lingual relation extraction (RE) using a subset of a standard English RE benchmark translated to Russian and Korean. In both sets of experiments, our results suggest a strong relation between cross-lingual stability and zero-shot parsing performance.",
}

@inproceedings{ceron23:_addit,
  abstract = {Automatic extraction of party (dis)similarities from texts such as party election manifestos or parliamentary speeches plays an increasing role in computational political science. How- ever, existing approaches are fundamentally limited to targeting only global party (dis)- similarity: they condense the relationship be- tween a pair of parties into a single figure, their similarity. In aggregating over all policy do- mains (e.g., health or foreign policy), they do not provide any qualitative insights into which domains parties agree or disagree on.
This paper proposes a workflow for estimat- ing policy domain aware party similarity that overcomes this limitation. The workflow cov- ers (a) definition of suitable policy domains; (b) automatic labeling of domains, if no man- ual labels are available; (c) computation of domain-level similarities and aggregation at a global level; (d) extraction of interpretable party positions on major policy axes via mul- tidimensional scaling. We evaluate our work- flow on manifestos from the German federal elections. We find that our method (a) yields high correlation when predicting party similar- ity at a global level and (b) provides accurate party-specific positions, even with automati- cally labelled policy domains.},
  added-at = {2023-05-02T15:14:08.000+0200},
  address = {Toronto, Canada},
  author = {Ceron, Tanise and Nikolaev, Dmitry and Pad{ó}, Sebastian},
  biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2c802a4bcaa362732985cfbe6b4ab376e/sp},
  booktitle = {Findings of ACL},
  interhash = {c15728109a92b2c1f84c603ca1c5249b},
  intrahash = {c802a4bcaa362732985cfbe6b4ab376e},
  keywords = {conference myown},
  timestamp = {2023-07-10T16:07:05.000+0200},
  title = {Additive manifesto decomposition: {A} policy domain aware method for understanding party positioning},
  url = {https://aclanthology.org/2023.findings-acl.499/},
  year = 2023
}

@inproceedings{moeller23:_attrib_method_siames_encod,
  abstract = {Despite the success of Siamese encoder models such as sentence
transformers (ST), little is known about the aspects of inputs they
pay attention to.  A barrier is that their predictions cannot be
attributed to individual features, as they compare two inputs rather
than processing a single one.
This paper derives a local attribution method for Siamese encoders by generalizing
the principle of integrated gradients to models with multiple inputs.
The solution takes the form of feature-pair attributions, and can be reduced to a token-token matrix for STs.
Our method involves the introduction of integrated Jacobians and inherits the advantageous formal properties of integrated gradients: it accounts for the model's full computation graph and is guaranteed to converge to the actual prediction.
A pilot study shows that in an ST few token-pairs can often explain large fractions of predictions, and it focuses on nouns and verbs.
For accurate predictions, it however needs to attend to the majority of tokens and parts of speech.
},
  added-at = {2023-10-07T22:28:13.000+0200},
  address = {Singapore},
  author = {M{ö}ller, Lucas and Nikolaev, Dmitry and Pad{ó}, Sebastian},
  biburl = {https://puma.ub.uni-stuttgart.de/bibtex/287b7c46571480164f9d4d94074a7467a/sp},
  booktitle = {Proceedings of EMNLP},
  interhash = {3d5cea892bbb78bdf925f5eae5aeb211},
  intrahash = {87b7c46571480164f9d4d94074a7467a},
  keywords = {conference myown},
  note = {To appear},
  timestamp = {2023-10-19T15:57:48.000+0200},
  title = {An Attribution Method for Siamese Encoders},
  url = {https://arxiv.org/pdf/2310.05703.pdf},
  year = 2023
}

@article{nikolaev2022gaps,
url = {https://doi.org/10.1515/lingty-2020-0128},
title = {A typology of consonant-inventory gaps},
author = {Dmitry Nikolaev},
pages = {161-186},
volume = {26},
number = {1},
journal = {Linguistic Typology},
doi = {doi:10.1515/lingty-2020-0128},
year = {2022},
lastchecked = {2023-11-03}
}

@article{nikolaev2023bootstrap,
url = {https://doi.org/10.1515/lingty-2022-0036},
title = {Bootstrap co-occurrence networks of consonants and the Basic Consonant Inventory},
author = {Dmitry Nikolaev},
pages = {363-380},
volume = {27},
number = {2},
journal = {Linguistic Typology},
doi = {doi:10.1515/lingty-2022-0036},
year = {2023},
lastchecked = {2023-11-03}
}

@inproceedings{nikolaev23:_adver,
  abstract = {This paper begins with the premise
that adverbs are
neglected in computational linguistics.
This view derives from two analyses: a literature review
and a novel adverb dataset to probe a state-of-the-art language model, thereby uncovering systematic gaps
in accounts for adverb meaning.
We suggest that using Frame Semantics
for characterizing word meaning, as 
in FrameNet, provides a promising approach to adverb analysis, given its ability to describe ambiguity, semantic roles, and null instantiation.
},
  added-at = {2023-05-16T14:56:38.000+0200},
  address = {Toronto, Canada},
  author = {Nikolaev, Dmitry and Baker, Collin and Petruck, Miriam R. L. and Pad{ó}, Sebastian},
  biburl = {https://puma.ub.uni-stuttgart.de/bibtex/240281f8254d2c4178f0777f53ce9158b/sp},
  booktitle = {Proceedings of STARSEM},
  interhash = {3639063158f8d1a513da26f8fadfa0d3},
  intrahash = {40281f8254d2c4178f0777f53ce9158b},
  keywords = {conference myown},
  timestamp = {2023-07-23T16:04:35.000+0200},
  title = {Adverbs, surprisingly},
  url = {https://aclanthology.org/2023.starsem-1.44.pdf},
  year = 2023
}

@inproceedings{nikolaev23:_multil,
  abstract = {Scaling analysis is a technique in computational political science that assigns a political actor (e.g. politician or party) a score on a predefined scale based on a (typically long) body of text (e.g. a parliamentary speech or an election manifesto). For example, political scientists have often used the left-right scale to systematically analyse political landscapes of different countries. NLP methods for automatic scaling analysis can find broad application provided they (i) are able to deal with long texts and (ii) work robustly across domains and languages. In this work, we implement and compare two approaches to automatic scaling analysis of political-party manifestos: label aggregation, a pipeline strategy relying on annotations of individual statements from the manifestos, and long-input-Transformer-based models, which compute scaling values directly from raw text. We carry out the analysis of the Comparative Manifestos Project dataset across 41 countries and 27 languages and find that the task can be efficiently solved by state-of-the-art models, with label aggregation producing the best results.},
  added-at = {2023-10-07T22:27:17.000+0200},
  address = {Singapore},
  author = {Nikolaev, Dmitry and Ceron, Tanise and Pad{ó}, Sebastian},
  biburl = {https://puma.ub.uni-stuttgart.de/bibtex/26b0a2b73c4af8c94ae6add12514d94cc/sp},
  booktitle = {Proceedings of EMNLP},
  interhash = {a4cc641921c9aa1101655d4b42d3a9d2},
  intrahash = {6b0a2b73c4af8c94ae6add12514d94cc},
  keywords = {conference myown},
  note = {To appear},
  timestamp = {2023-10-20T10:36:18.000+0200},
  title = {Multilingual estimation of political-party positioning: From label aggregation to long-input Transformers},
  url = {https://arxiv.org/abs/2310.12575},
  year = 2023
}

@inproceedings{nikolaev-pado-2022-word, 
    title = "Word-order Typology in Multilingual {BERT}: A Case Study in Subordinate-Clause Detection", 
    author = "Nikolaev, Dmitry  and Pad{ó}, Sebastian", 
    booktitle = "Proceedings of the 4th Workshop on Research in Computational Linguistic Typology and Multilingual NLP", 
    month = jul, 
    year = "2022", 
    address = "Seattle, Washington", 
    publisher = "Association for Computational Linguistics", 
    url = "https://aclanthology.org/2022.sigtyp-1.2", 
    doi = "10.18653/v1/2022.sigtyp-1.2", 
    pages = "11-21", 
    abstract = "The capabilities and limitations of BERT and similar models are still unclear when it comes to learning syntactic abstractions, in particular across languages. In this paper, we use the task of subordinate-clause detection within and across languages to probe these properties. We show that this task is deceptively simple, with easy gains offset by a long tail of harder cases, and that BERT{'}s zero-shot performance is dominated by word-order effects, mirroring the SVO/VSO/SOV typology.", 
} 

@inproceedings{nikolaev23:_argadj,
  abstract = {The distinction between arguments and adjuncts is a fundamental
  assumption of several linguistic theories. In this study, we investigate to
  what extent this distinction is picked up by a Transformer-based
  language model. We use BERT as a case study, operationalizing
  arguments and adjuncts as core and non-core FrameNet frame elements,
  respectively, and tying them to activations of
  particular BERT neurons.
  We present evidence, from English and Korean, that BERT learns more
  dedicated representations for arguments than for adjuncts when
  fine-tuned on the FrameNet frame-identification task. We also show that
  this distinction is already present in a weaker form in the vanilla
  pre-trained model.},
  added-at = {2023-04-26T18:18:31.000+0200},
  address = {Nancy, France},
  author = {Nikolaev, Dmitry and Pad{ó}, Sebastian},
  biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2a999feae16c6e240857b18253302a552/sp},
  booktitle = {Proceedings of IWCS},
  interhash = {e17b9da7bc7833b7ca6de99aab4a5e32},
  intrahash = {a999feae16c6e240857b18253302a552},
  keywords = {conference myown},
  timestamp = {2023-07-04T19:35:55.000+0200},
  title = {The argument-adjunct distinction in {BERT}: A {FrameNet}-based investigation},
  url = {https://iwcs.pimoid.fr/2.pdf},
  year = 2023
}

@inproceedings{nikolaev23:_inves_trans,
  abstract = { The question of what kinds of linguistic information are encoded in different layers
    of Transformer-based language models is of considerable interest for the NLP community.
    Existing work, however, has overwhelmingly focused on word-level representations and
    encoder-only language models with the masked-token training objective.
    In this paper, we present experiments with semantic structural probing,
    a method for studying sentence-level representations
    via finding a subspace of the embedding space that provides
    suitable task-specific pairwise distances between data-points.
    We apply our method to language models from different families (encoder-only, decoder-only,
    encoder-decoder) and of different sizes in the context of two tasks, semantic textual similarity
    and natural-language inference. We find that model families differ substantially in their
    performance and layer dynamics, but that the results are largely model-size invariant.},
  added-at = {2023-10-08T21:21:47.000+0200},
  address = {Singapore},
  author = {Nikolaev, Dmitry and Pad{ó}, Sebastian},
  biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2412d5acd02f1f1261544a2208b8b24eb/sp},
  booktitle = {Proceedings of the BlackboxNLP workshop},
  interhash = {fbe02e9b3012e988b0767116af72521d},
  intrahash = {412d5acd02f1f1261544a2208b8b24eb},
  keywords = {myown workshop},
  note = {To appear},
  timestamp = {2023-10-19T15:59:55.000+0200},
  title = {Investigating semantic subspaces of Transformer sentence embeddings through linear structural probing},
  url = {https://arxiv.org/abs/2310.11923},
  year = 2023
}

@inproceedings{nikolaev23:_repres,
  abstract = {Variants of the BERT architecture specialised for producing full-sentence representations often achieve better performance on downstream tasks than sentence embeddings extracted from vanilla BERT. However, there is still little understanding of what properties of inputs determine the properties of such representations. In this study, we construct several sets of sentences with pre-defined lexical and syntactic structures and show that SOTA sentence transformers have a strong nominal-participant-set bias: cosine similarities between pairs of sentences are more strongly determined by the overlap in the set of their noun participants than by having the same predicates, lengthy nominal modifiers, or adjuncts. At the same time, the precise syntactic-thematic functions of the participants are largely irrelevant.},
  added-at = {2023-01-23T10:54:40.000+0100},
  address = {Dubrovnik, Croatia},
  author = {Nikolaev, Dmitry and Pad{ó}, Sebastian},
  biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2c0170446a363bca46ae6b2da016b151d/sp},
  booktitle = {Proceedings of EACL},
  interhash = {2dd0ba5490c2846139e7654557e7eb14},
  intrahash = {c0170446a363bca46ae6b2da016b151d},
  keywords = {conference myown},
  timestamp = {2023-05-02T15:21:40.000+0200},
  title = {Representation biases in sentence transformers},
  url = {https://aclanthology.org/2023.eacl-main.268},
  year = 2023
}

@inproceedings{nikolaev23:_universe,
	abstract = {It has been argued that BERT ``rediscovers the traditional NLP
	pipeline'', with lower layers extracting morphosyntactic features and
	higher layers creating holistic sentence-level representations.
	In this paper, we critically examine this assumption through a
	principle-component-guided analysis, extracing sets of inputs that
	correspond to specific activation patterns in BERT sentence representations.
	We find that even in higher layers, the model mostly picks up on a
	variegated bunch of low-level features, many related to sentence
	complexity, that presumably arise from its specific pre-training
	objectives.},
	added-at = {2023-04-26T18:18:19.000+0200},
	address = {Nancy, France},
	author = {Nikolaev, Dmitry and Pad{ó}, Sebastian},
	biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2995882df8792875cb3a67bbea378a90a/sp},
	booktitle = {Proceedings of IWCS},
	interhash = {9dabc394be5ffe8d8b9818192791f99a},
	intrahash = {995882df8792875cb3a67bbea378a90a},
	keywords = {conference myown},
	timestamp = {2023-07-04T19:36:51.000+0200},
	title = {The Universe of Utterances According to {BERT}},
	url = {https://iwcs.pimoid.fr/60.pdf},
	year = 2023
}