BibTeX
@article{Arora2022espnet,
title={{ESPnet-SLU: Advancing Spoken Language Understanding through ESPnet}},
author = {Arora, Siddhant and Dalmia, Siddharth and Denisov, Pavel and Chang, Xuankai and Ueda, Yushi and Peng, Yifan and Zhang, Yuekai and Kumar, Sujay and Ganesan, Karthik and Yan, Brian and Vu, Ngoc Thang and Black, Alan W and Watanabe, Shinji},
booktitle={2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
organization={IEEE},
year={2022}
}
@article{Denisov2021ims,
title={{IMS}{'} Systems for the {IWSLT} 2021 Low-Resource Speech Translation Task},
author = {Denisov, Pavel and Mager, Manuel and Vu, Ngoc Thang},
journal={Proceedings of the 18th International Conference on Spoken Language Translation (IWSLT 2021)},
pages={175-181},
year={2021}
}
@article{Denisov2019end,
title={{End-to-End Multi-Speaker Speech Recognition Using Speaker Embeddings and Transfer Learning}},
author = {Denisov, Pavel and Vu, Ngoc Thang},
journal={Proceedings of Interspeech 2019},
pages={4425-4429},
year={2019}
}
@article{Denisov2019ims,
title={{IMS-speech: A speech to text tool}},
author = {Denisov, Pavel and Vu, Ngoc Thang},
journal={Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2019},
pages={170-177},
year={2019},
publisher={TUDpress, Dresden}
}
@article{Denisov2020pretrained,
title={{Pretrained Semantic Speech Embeddings for End-to-End Spoken Language Understanding via Cross-Modal Teacher-Student Learning}},
author = {Denisov, Pavel and Vu, Ngoc Thang},
journal={Proceedings of Interspeech 2020},
pages={881-885},
year={2020}
}
@InProceedings{Denisov2018unsupervised,
title={Unsupervised domain adaptation by adversarial learning for robust speech recognition},
author = {Denisov, Pavel and Vu, Ngoc Thang and Font, Marc Ferras},
booktitle={Speech Communication; 13th ITG-Symposium},
year={2018},
}
@article{Hamed2022investigations,
title={{Investigations on speech recognition systems for low-resource dialectal Arabic-English code-switching speech}},
author = {Hamed, Injy and Denisov, Pavel and Li, Chia-Yu and Elmahdy, Mohamed and Abdennadher, Slim and Vu, Ngoc Thang},
journal = {Computer Speech & Language},
volume = {72},
year={2022},
publisher={Elsevier}
}
@article{Li2020adviser,
title={{ADVISER: A Toolkit for Developing Multi-modal, Multi-domain and Socially-engaged Conversational Agents}},
author = {Li, Chia-Yu and Ortega, Daniel and V{ä}th, Dirk and Lux, Florian and Vanderlyn, Lindsey and Schmidt, Maximilian and Neumann, Michael and V{ö}lkel, Moritz and Denisov, Pavel and Jenne, Sabrina and Kacarevic, Zorica and Vu, Ngoc Thang},
journal={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations},
pages={279-286},
publisher={Association for Computational Linguistics},
year={2020}
}
@InProceedings{LuxBlizzard2023,
author={Florian Lux and Julia Koch and Sarina Meyer and Thomas Bott and Nadja Schauffler and Pavel Denisov and Antje Schweitzer and Ngoc Thang Vu},
title = {The IMS Toucan system for the Blizzard Challenge 2023},
booktitle = {Blizzard Challenge 2023},
year = 2023}
@InProceedings{meyer22:speaker,
author = {Sarina Meyer and Florian Lux and Pavel Denisov and Julia Koch and Pascal Tilli and Ngoc Thang Vu},
title = {Speaker Anonymization with Phonetic Intermediate Representations},
booktitle = {Proc. Interspeech 2022},
year = 2022,
pages = {4925-4929},
address = {Incheon, Korea},
url = {https://www.isca-speech.org/archive/interspeech_2022/meyer22b_interspeech}
}
@INPROCEEDINGS{10096607,
author={Meyer, Sarina and Lux, Florian and Koch, Julia and Denisov, Pavel and Tilli, Pascal and Vu, Ngoc Thang},
booktitle={ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
title={Prosody Is Not Identity: A Speaker Anonymization Approach Using Prosody Cloning},
year=2023,
pages={1-5},
doi={10.1109/ICASSP49357.2023.10096607}}
@InProceedings{meyer23:anonymizing,
author = {Sarina Meyer and Pascal Tilli and Pavel Denisov and Florian Lux and Julia Koch and Ngoc Thang Vu},
title = {Anonymizing Speech with Generative Adversarial Networks to Preserve Speaker Privacy},
booktitle = {Proc. IEEE Spoken Language Technology Workshop (SLT) 2022},
year = 2023,
pages = {912-919},
address = {Doha, Qatar},
url = {https://arxiv.org/abs/2210.07002}
}
@InProceedings{meyer22:cascade,
author = {Sarina Meyer and Pascal Tilli and Florian Lux and Pavel Denisov and Julia Koch and Ngoc Thang Vu},
title = {Cascade of Phonetic Speech Recognition, Speaker Embeddings GAN and Multispeaker Speech Synthesis for the VoicePrivacy 2022 Challenge},
booktitle = {Proc. 2nd Symposium on Security and Privacy in Speech Communication},
year = 2022,
address = {Incheon, Korea},
url = {https://www.isca-speech.org/archive/spsc_2022/meyer22_spsc}
}
@InProceedings{ortega2019context,
title={Context-aware neural-based dialog act classification on automatically generated transcriptions},
author={Ortega, Daniel and Li, Chia-Yu and Vallejo, Gisela and Denisov, Pavel and Vu, Ngoc Thang},
booktitle={2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages={7265-7269},
year={2019},
organization={IEEE}
}
@article{Raj2021integration,
title={{Integration of Speech Separation, Diarization, and Recognition for Multi-Speaker Meetings: System Description, Comparison, and Analysis}},
author = {Raj, Desh and Denisov, Pavel and Chen, Zhuo and Erdogan, Hakan and Huang, Zili and He, Maokui and Watanabe, Shinji and Du, Jun and Yoshioka, Takuya and Luo, Yi and Kanda, Naoyuki and Li, Jinyu and Wisdom, Scott and Hershey, John R.},
journal={2021 IEEE Spoken Language Technology Workshop (SLT)},
pages={897-904},
year={2021}
}