publications.bib

@misc{ferrando2024primerinnerworkingstransformerbased,
    title={A Primer on the Inner Workings of Transformer-based Language Models}, 
    author={Javier Ferrando and Gabriele Sarti and Arianna Bisazza and Marta R. Costa-jussà},
    year={2024},
    eprint={2405.00208},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    url={https://arxiv.org/abs/2405.00208}, 
}

@misc{tsiamas2024pushinglimitszeroshotendtoend,
    title={Pushing the Limits of Zero-shot End-to-End Speech Translation}, 
    author={Ioannis Tsiamas and Gerard I. Gállego and José A. R. Fonollosa and Marta R. Costa-jussà},
    year={2024},
    eprint={2402.10422},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    url={https://arxiv.org/abs/2402.10422}, 
}

@inproceedings{alastruey-etal-2024-speechalign-framework,
    title = "{S}peech{A}lign: A Framework for Speech Translation Alignment Evaluation",
    author = "Alastruey, Belen  and
      Sant, Aleix  and
      G{\'a}llego, Gerard I.  and
      Dale, David  and
      Costa-juss{\`a}, Marta R.",
    editor = "Calzolari, Nicoletta  and
      Kan, Min-Yen  and
      Hoste, Veronique  and
      Lenci, Alessandro  and
      Sakti, Sakriani  and
      Xue, Nianwen",
    booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
    month = may,
    year = "2024",
    address = "Torino, Italia",
    publisher = "ELRA and ICCL",
    url = "https://aclanthology.org/2024.lrec-main.1316",
    pages = "15137--15146",
    abstract = "Speech-to-Speech and Speech-to-Text translation are currently dynamic areas of research. In our commitment to advance these fields, we present SpeechAlign, a framework designed to evaluate the underexplored field of source-target alignment in speech models. The SpeechAlign framework has two core components. First, to tackle the absence of suitable evaluation datasets, we introduce the Speech Gold Alignment dataset, built upon a English-German text translation gold alignment dataset. Secondly, we introduce two novel metrics, Speech Alignment Error Rate (SAER) and Time-weighted Speech Alignment Error Rate (TW-SAER), which enable the evaluation of alignment quality within speech models. While the former gives equal importance to each word, the latter assigns weights based on the length of the words in the speech signal. By publishing SpeechAlign we provide an accessible evaluation framework for model assessment, and we employ it to benchmark open-source Speech Translation models. In doing so, we contribute to the ongoing research progress within the fields of Speech-to-Speech and Speech-to-Text translation.",
}

@inproceedings{costa-jussa-etal-2023-toxicity,
    title = "Toxicity in Multilingual Machine Translation at Scale",
    author = "Costa-juss{\`a}, Marta  and
      Smith, Eric  and
      Ropers, Christophe  and
      Licht, Daniel  and
      Maillard, Jean  and
      Ferrando, Javier  and
      Escolano, Carlos",
    editor = "Bouamor, Houda  and
      Pino, Juan  and
      Bali, Kalika",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
    month = dec,
    year = "2023",
    address = "Singapore",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.findings-emnlp.642",
    doi = "10.18653/v1/2023.findings-emnlp.642",
    pages = "9570--9586",
    abstract = "Machine Translation systems can produce different types of errors, some of which are characterized as critical or catastrophic due to the specific negative impact that they can have on users. In this paper we focus on one type of critical error: added toxicity. We evaluate and analyze added toxicity when translating a large evaluation dataset (HOLISTICBIAS, over 472k sentences, covering 13 demographic axes) from English into 164 languages. An automatic toxicity evaluation shows that added toxicity across languages varies from 0{\%} to 5{\%}. The output languages with the most added toxicity tend to be low-resource ones, and the demographic axes with the most added toxicity include sexual orientation, gender and sex, and ability. We also perform human evaluation on a subset of 8 translation directions, confirming the prevalence of true added toxicity. We use a measurement of the amount of source contribution to the translation, where a low source contribution implies hallucination, to interpret what causes toxicity. Making use of the input attributions allows us to explain toxicity, because the source contributions significantly correlate with toxicity for 84{\%} of languages studied. Given our findings, our recommendations to reduce added toxicity are to curate training data to avoid mistranslations, mitigate hallucination and check unstable translations.",
}

@inproceedings{tsiamas-etal-2023-segaugment,
    title = "{S}eg{A}ugment: Maximizing the Utility of Speech Translation Data with Segmentation-based Augmentations",
    author = "Tsiamas, Ioannis  and
      Fonollosa, Jos{\'e}  and
      Costa-juss{\`a}, Marta",
    editor = "Bouamor, Houda  and
      Pino, Juan  and
      Bali, Kalika",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
    month = dec,
    year = "2023",
    address = "Singapore",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.findings-emnlp.574",
    doi = "10.18653/v1/2023.findings-emnlp.574",
    pages = "8569--8588",
    abstract = "End-to-end Speech Translation is hindered by a lack of available data resources. While most of them are based on documents, a sentence-level version is available, which is however single and static, potentially impeding the usefulness of the data. We propose a new data augmentation strategy, SegAugment, to address this issue by generating multiple alternative sentence-level versions of a dataset. Our method utilizes an Audio Segmentation system, which re-segments the speech of each document with different length constraints, after which we obtain the target text via alignment methods. Experiments demonstrate consistent gains across eight language pairs in MuST-C, with an average increase of 2.5 BLEU points, and up to 5 BLEU for low-resource scenarios in mTEDx. Furthermore, when combined with a strong system, SegAugment obtains state-of-the-art results in MuST-C. Finally, we show that the proposed method can also successfully augment sentence-level datasets, and that it enables Speech Translation models to close the gap between the manual and automatic segmentation at inference time.",
}

@inproceedings{ferrando-etal-2023-automating,
    title = "Automating Behavioral Testing in Machine Translation",
    author = "Ferrando, Javier  and
      Sperber, Matthias  and
      Setiawan, Hendra  and
      Telaar, Dominic  and
      Hasan, Sa{\v{s}}a",
    editor = "Koehn, Philipp  and
      Haddow, Barry  and
      Kocmi, Tom  and
      Monz, Christof",
    booktitle = "Proceedings of the Eighth Conference on Machine Translation",
    month = dec,
    year = "2023",
    address = "Singapore",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.wmt-1.97",
    doi = "10.18653/v1/2023.wmt-1.97",
    pages = "1014--1030",
    abstract = "Behavioral testing in NLP allows fine-grained evaluation of systems by examining their linguistic capabilities through the analysis of input-output behavior. Unfortunately, existing work on behavioral testing in Machine Translation (MT) is currently restricted to largely handcrafted tests covering a limited range of capabilities and languages. To address this limitation, we propose to use Large Language Models (LLMs) to generate a diverse set of source sentences tailored to test the behavior of MT models in a range of situations. We can then verify whether the MT model exhibits the expected behavior through matching candidate sets that are also generated using LLMs. Our approach aims to make behavioral testing of MT systems practical while requiring only minimal human effort. In our experiments, we apply our proposed evaluation framework to assess multiple available MT systems, revealing that while in general pass-rates follow the trends observable from traditional accuracy-based metrics, our method was able to uncover several important differences and potential bugs that go unnoticed when relying only on accuracy.",
}

@misc{carrino2023promotinggeneralizedcrosslingualquestion,
    title={Promoting Generalized Cross-lingual Question Answering in Few-resource Scenarios via Self-knowledge Distillation}, 
    author={Casimiro Pio Carrino and Carlos Escolano and José A. R. Fonollosa},
    year={2023},
    eprint={2309.17134},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    url={https://arxiv.org/abs/2309.17134}, 
}

@misc{voita2023neuronslargelanguagemodels,
    title={Neurons in Large Language Models: Dead, N-gram, Positional}, 
    author={Elena Voita and Javier Ferrando and Christoforos Nalmpantis},
    year={2023},
    eprint={2309.04827},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    url={https://arxiv.org/abs/2309.04827}, 
}

@inproceedings{sant23_interspeech,
  author={Gerard Sant and Carlos Escolano},
  title={{Analysis of Acoustic information in End-to-End Spoken Language Translation}},
  year=2023,
  booktitle={Proc. INTERSPEECH 2023},
  pages={52--56},
  doi={10.21437/Interspeech.2023-2050},
  issn={2958-1796}
}

@inproceedings{torrero-etal-2023-talp,
    title = "{TALP}-{UPC} at {P}rob{S}um 2023: Fine-tuning and Data Augmentation Strategies for {NER}",
    author = "Torrero, Neil  and
      Sant, Gerard  and
      Escolano, Carlos",
    editor = "Demner-fushman, Dina  and
      Ananiadou, Sophia  and
      Cohen, Kevin",
    booktitle = "The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks",
    month = jul,
    year = "2023",
    address = "Toronto, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.bionlp-1.48",
    doi = "10.18653/v1/2023.bionlp-1.48",
    pages = "497--502",
    abstract = "This paper describes the submission of the TALP-UPC team to the Problem List Summarization task from the BioNLP 2023 workshop. This task consists of automatically extracting a list of health issues from the e-health medical record of a given patient. Our submission combines additional steps of data annotationwith finetuning of BERT pre-trained language models. Our experiments focus on the impact of finetuning on different datasets as well as the addition of data augmentation techniques to delay overfitting.",
}

@inproceedings{tsiamas-etal-2023-speech,
    title = "Speech Translation with Foundation Models and Optimal Transport: {UPC} at {IWSLT}23",
    author = "Tsiamas, Ioannis  and
      I. G{\'a}llego, Gerard  and
      Fonollosa, Jose  and
      R. Costa-juss{\'a}, Marta",
    editor = "Salesky, Elizabeth  and
      Federico, Marcello  and
      Carpuat, Marine",
    booktitle = "Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)",
    month = jul,
    year = "2023",
    address = "Toronto, Canada (in-person and online)",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.iwslt-1.38",
    doi = "10.18653/v1/2023.iwslt-1.38",
    pages = "397--410",
    abstract = "This paper describes the submission of the UPC Machine Translation group to the IWSLT 2023 Offline Speech Translation task. Our Speech Translation systems utilize foundation models for speech (wav2vec 2.0) and text (mBART50). We incorporate a Siamese pretraining step of the speech and text encoders with CTC and Optimal Transport, to adapt the speech representations to the space of the text model, thus maximizing transfer learning from MT. After this pretraining, we fine-tune our system end-to-end on ST, with Cross Entropy and Knowledge Distillation. Apart from the available ST corpora, we create synthetic data with SegAugment to better adapt our models to the custom segmentations of the IWSLT test sets. Our best single model obtains 31.2 BLEU points on MuST-C tst-COMMON, 29.8 points on IWLST.tst2020 and 33.4 points on the newly released IWSLT.ACLdev2023.",
}

@inproceedings{ferrando-etal-2023-explaining,
    title = "Explaining How Transformers Use Context to Build Predictions",
    author = "Ferrando, Javier  and
      G{\'a}llego, Gerard I.  and
      Tsiamas, Ioannis  and
      Costa-juss{\`a}, Marta R.",
    editor = "Rogers, Anna  and
      Boyd-Graber, Jordan  and
      Okazaki, Naoaki",
    booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2023",
    address = "Toronto, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.acl-long.301",
    doi = "10.18653/v1/2023.acl-long.301",
    pages = "5486--5513",
    abstract = "Language Generation Models produce words based on the previous context. Although existing methods offer input attributions as explanations for a model{'}s prediction, it is still unclear how prior words affect the model{'}s decision throughout the layers. In this work, we leverage recent advances in explainability of the Transformer and present a procedure to analyze models for language generation. Using contrastive examples, we compare the alignment of our explanations with evidence of the linguistic phenomena, and show that our method consistently aligns better than gradient-based and perturbation-based baselines. Then, we investigate the role of MLPs inside the Transformer and show that they learn features that help the model predict words that are grammatically acceptable. Lastly, we apply our method to Neural Machine Translation models, and demonstrate that they generate human-like source-target alignments for building predictions.",
}

@inproceedings{10208355,
  author={Tarrés, Laia and Gállego, Gerard I. and Duarte, Amanda and Torres, Jordi and Giró-i-Nieto, Xavier},
  booktitle={2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)}, 
  title={Sign Language Translation from Instructional Videos}, 
  year={2023},
  volume={},
  number={},
  pages={5625-5635},
  keywords={Computer vision;Codes;Computational modeling;Conferences;Gesture recognition;Assistive technologies;Benchmark testing},
  doi={10.1109/CVPRW59228.2023.00596}}

@inproceedings{10095276,
    author={Tsiamas, Ioannis and Gállego, Gerard I. and Fonollosa, José A. R. and Costa-jussà, Marta R.},
    booktitle={ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 
    title={Efficient Speech Translation with Dynamic Latent Perceivers}, 
    year={2023},
    volume={},
    number={},
    pages={1-5},
    keywords={Training;Costs;Computational modeling;Computer architecture;Signal processing;Transformers;Boosting;Speech Translation;Efficiency;Perceiver},
    doi={10.1109/ICASSP49357.2023.10095276},
}

@misc{gilabert2023resetoxrelearningattentionweights,
    title={ReSeTOX: Re-learning attention weights for toxicity mitigation in machine translation}, 
    author={Javier García Gilabert and Carlos Escolano and Marta R. Costa-Jussà},
    year={2023},
    eprint={2305.11761},
    archivePrefix={arXiv},
    primaryClass={cs.CL},
    url={https://arxiv.org/abs/2305.11761}, 
}

@inproceedings{costajussa22occgen,
    title     = {{OccGen: Selection of Real-world Multilingual Parallel Data Balanced in Gender within Occupations}},
    author    = {Marta R. Costa-juss{\`a} and
                 Christine Basta and
		 Oriol Domingo and
		 Andr{\'e} Niyongabo Rubungo},
    booktitle = {Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track},
    month     = dec,
    year      = {2022},
    url       = {https://openreview.net/forum?id=tTPVefaATp6}
}

@inproceedings{tarres-etal-2022-tackling,
    title = "Tackling Low-Resourced Sign Language Translation: {UPC} at {WMT}-{SLT} 22",
    author = "Tarres, Laia  and
      G{\'a}llego, Gerard I.  and
      Giro-i-nieto, Xavier  and
      Torres, Jordi",
    editor = {Koehn, Philipp  and
      Barrault, Lo{\"\i}c  and
      Bojar, Ond{\v{r}}ej  and
      Bougares, Fethi  and
      Chatterjee, Rajen  and
      Costa-juss{\`a}, Marta R.  and
      Federmann, Christian  and
      Fishel, Mark  and
      Fraser, Alexander  and
      Freitag, Markus  and
      Graham, Yvette  and
      Grundkiewicz, Roman  and
      Guzman, Paco  and
      Haddow, Barry  and
      Huck, Matthias  and
      Jimeno Yepes, Antonio  and
      Kocmi, Tom  and
      Martins, Andr{\'e}  and
      Morishita, Makoto  and
      Monz, Christof  and
      Nagata, Masaaki  and
      Nakazawa, Toshiaki  and
      Negri, Matteo  and
      N{\'e}v{\'e}ol, Aur{\'e}lie  and
      Neves, Mariana  and
      Popel, Martin  and
      Turchi, Marco  and
      Zampieri, Marcos},
    booktitle = "Proceedings of the Seventh Conference on Machine Translation (WMT)",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates (Hybrid)",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.wmt-1.97",
    pages = "994--1000",
    abstract = "This paper describes the system developed at the Universitat Polit{\`e}cnica de Catalunya for the Workshop on Machine Translation 2022 Sign Language Translation Task, in particular, for the sign-to-text direction. We use a Transformer model implemented with the Fairseq modeling toolkit. We have experimented with the vocabulary size, data augmentation techniques and pretraining the model with the PHOENIX-14T dataset. Our system obtains 0.50 BLEU score for the test set, improving the organizers{'} baseline by 0.38 BLEU. We remark the poor results for both the baseline and our system, and thus, the unreliability of our findings.",
}

@inproceedings{ferrando-etal-2022-towards,
    title = "Towards Opening the Black Box of Neural Machine Translation: Source and Target Interpretations of the Transformer",
    author = "Ferrando, Javier  and
      G{\'a}llego, Gerard I.  and
      Alastruey, Belen  and
      Escolano, Carlos  and
      Costa-juss{\`a}, Marta R.",
    editor = "Goldberg, Yoav  and
      Kozareva, Zornitsa  and
      Zhang, Yue",
    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.emnlp-main.599",
    doi = "10.18653/v1/2022.emnlp-main.599",
    pages = "8756--8769",
    abstract = "In Neural Machine Translation (NMT), each token prediction is conditioned on the source sentence and the target prefix (what has been previously translated at a decoding step). However, previous work on interpretability in NMT has mainly focused solely on source sentence tokens{'} attributions. Therefore, we lack a full understanding of the influences of every input token (source sentence and target prefix) in the model predictions. In this work, we propose an interpretability method that tracks input tokens{'} attributions for both contexts. Our method, which can be extended to any encoder-decoder Transformer-based model, allows us to better comprehend the inner workings of current NMT models. We apply the proposed method to both bilingual and multilingual Transformers and present insights into their behaviour.",
}

@inproceedings{ferrando2022measuring,
    title = "Measuring the Mixing of Contextual Information in the Transformer",
    author = "Ferrando, Javier  and
      G{\'a}llego, Gerard I.  and
      Costa-juss{\`a}, Marta R.",
    editor = "Goldberg, Yoav  and
      Kozareva, Zornitsa  and
      Zhang, Yue",
    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.emnlp-main.595",
    doi = "10.18653/v1/2022.emnlp-main.595",
    pages = "8698--8714",
    abstract = "The Transformer architecture aggregates input information through the self-attention mechanism, but there is no clear understanding of how this information is mixed across the entire model. Additionally, recent works have demonstrated that attention weights alone are not enough to describe the flow of information. In this paper, we consider the whole attention block {--}multi-head attention, residual connection, and layer normalization{--} and define a metric to measure token-to-token interactions within each layer. Then, we aggregate layer-wise interpretations to provide input attribution scores for model predictions. Experimentally, we show that our method, ALTI (Aggregation of Layer-wise Token-to-token Interactions), provides more faithful explanations and increased robustness than gradient-based methods.",
}

@inproceedings{tsiamas22_interspeech,
    author={Ioannis Tsiamas and Gerard I. G{\'a}llego and Jos{\'e} A. R. Fonollosa and Marta R. Costa-juss{\`a}},
    title={{SHAS: Approaching optimal Segmentation for End-to-End Speech Translation}},
    month=sep,
    year=2022,
    booktitle={Proc. Interspeech 2022},
    pages={106--110},
    url={https://www.isca-speech.org/archive/interspeech_2022/tsiamas22_interspeech.html},
    doi={10.21437/Interspeech.2022-59}
}

@inproceedings{sant2022multiformer,
    title     = "Multiformer: A Head-Configurable Transformer-Based Model for Direct Speech Translation",
    author    = "Sant, Gerard  and
      G{\'a}llego, Gerard I.  and
      Alastruey, Belen  and
      Costa-juss{\`a}, Marta Ruiz",
    booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Student Research Workshop",
    month     = jul,
    year      = "2022",
    address   = "Hybrid: Seattle, Washington + Online",
    publisher = "Association for Computational Linguistics",
    url       = "https://aclanthology.org/2022.naacl-srw.34",
    pages     = "277--284",
    abstract  = "Transformer-based models have been achieving state-of-the-art results in several fields of Natural Language Processing. However, its direct application to speech tasks is not trivial. The nature of this sequences carries problems such as long sequence lengths and redundancy between adjacent tokens. Therefore, we believe that regular self-attention mechanism might not be well suited for it. Different approaches have been proposed to overcome these problems, such as the use of efficient attention mechanisms. However, the use of these methods usually comes with a cost, which is a performance reduction caused by information loss. In this study, we present the Multiformer, a Transformer-based model which allows the use of different attention mechanisms on each head. By doing this, the model is able to bias the self-attention towards the extraction of more diverse token interactions, and the information loss is reduced. Finally, we perform an analysis of the head contributions, and we observe that those architectures where all heads relevance is uniformly distributed obtain better results. Our results show that mixing attention patterns along the different heads and layers outperforms our baseline by up to 0.7 BLEU.",
}

@inproceedings{costa-jussa-etal-2022-evaluating,
    title = "Evaluating Gender Bias in Speech Translation",
    author = "Costa-juss{\`a}, Marta R.  and
      Basta, Christine  and
      G{\'a}llego, Gerard I.",
    booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
    month = jun,
    year = "2022",
    address = "Marseille, France",
    publisher = "European Language Resources Association",
    url = "https://aclanthology.org/2022.lrec-1.230",
    pages = "2141--2147",
    abstract = "The scientific community is increasingly aware of the necessity to embrace pluralism and consistently represent major and minor social groups. Currently, there are no standard evaluation techniques for different types of biases. Accordingly, there is an urgent need to provide evaluation sets and protocols to measure existing biases in our automatic systems. Evaluating the biases should be an essential step towards mitigating them in the systems. This paper introduces WinoST, a new freely available challenge set for evaluating gender bias in speech translation. WinoST is the speech version of WinoMT, an MT challenge set, and both follow an evaluation protocol to measure gender accuracy. Using an S-Transformer end-to-end speech translation system, we report the gender bias evaluation on four language pairs, and we reveal the inaccuracies in translations generating gender-stereotyped translations.",
}

@inproceedings{tsiamas-etal-2022-pretrained,
    title = "Pretrained Speech Encoders and Efficient Fine-tuning Methods for Speech Translation: {UPC} at {IWSLT} 2022",
    author = "Tsiamas, Ioannis  and
      G{\'a}llego, Gerard I.  and
      Escolano, Carlos  and
      Fonollosa, Jos{\'e}  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)",
    month = may,
    year = "2022",
    address = "Dublin, Ireland (in-person and online)",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.iwslt-1.23",
    pages = "265--276",
    abstract = "This paper describes the submissions of the UPC Machine Translation group to the IWSLT 2022 Offline Speech Translation and Speech-to-Speech Translation tracks. The offline task involves translating English speech to German, Japanese and Chinese text. Our Speech Translation systems are trained end-to-end and are based on large pretrained speech and text models. We use an efficient fine-tuning technique that trains only specific layers of our system, and explore the use of adapter modules for the non-trainable layers. We further investigate the suitability of different speech encoders (wav2vec 2.0, HuBERT) for our models and the impact of knowledge distillation from the Machine Translation model that we use for the decoder (mBART). For segmenting the IWSLT test sets we fine-tune a pretrained audio segmentation model and achieve improvements of 5 BLEU compared to the given segmentation. Our best single model uses HuBERT and parallel adapters and achieves 29.42 BLEU at English-German MuST-C tst-COMMON and 26.77 at IWSLT 2020 test. By ensembling many models, we further increase translation quality to 30.83 BLEU and 27.78 accordingly. Furthermore, our submission for English-Japanese achieves 15.85 and English-Chinese obtains 25.63 BLEU on the MuST-C tst-COMMON sets. Finally, we extend our system to perform English-German Speech-to-Speech Translation with a pretrained Text-to-Speech model.",
}

@inproceedings{alastruey-etal-2022-locality,
    title = "On the Locality of Attention in Direct Speech Translation",
    author = "Alastruey, Belen  and
      Ferrando, Javier  and
      G{\'a}llego, Gerard I.  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
    month = may,
    year = "2022",
    address = "Dublin, Ireland",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.acl-srw.32",
    pages = "402--412",
    abstract = "Transformers have achieved state-of-the-art results across multiple NLP tasks. However, the self-attention mechanism complexity scales quadratically with the sequence length, creating an obstacle for tasks involving long sequences, like in the speech domain. In this paper, we discuss the usefulness of self-attention for Direct Speech Translation. First, we analyze the layer-wise token contributions in the self-attention of the encoder, unveiling local diagonal patterns. To prove that some attention weights are avoidable, we propose to substitute the standard self-attention with a local efficient one, setting the amount of context used based on the results of the analysis. With this approach, our model matches the baseline performance, and improves the efficiency by skipping the computation of those weights that standard attention discards.",
}

@article{escolano2022multilingual,
  title={Multilingual machine translation: Deep analysis of language-specific encoder-decoders},
  author={Escolano, Carlos and Costa-juss{\`a}, Marta Ruiz and Fonollosa, Jos{\'e} AR},
  journal={Journal of Artificial Intelligence Research},
  volume={73},
  pages={1535--1552},
  year={2022},
  month=apr,
  url = "https://www.jair.org/index.php/jair/article/view/12699",
}

@inproceedings{costajussa2022genderbias,
  author={Marta Ruiz Costa-juss{\`a} and Carlos Escolano and Christine Basta and Javier Ferrando and Roser Batlle Roca and Ksenia Kharitonova},
  title={Interpreting Gender Bias in Neural Machine Translation: Multilingual Architecture Matters},
  booktitle = {Procedings of the 36th AAAI Conference on Artificial Intelligence},
  month = feb,
  year = "2022",
  url={https://www.aaai.org/AAAI22Papers/AISI-2223.CostajussaM.pdf}
}

@misc{domingo2022multitask,
    title={A multi-task semi-supervised framework for Text2Graph & Graph2Text}, 
    author={Oriol Domingo and Marta R. Costa-jussà and Carlos Escolano},
    year={2022},
    month=feb,
    journal={arXiv preprint arXiv:2202.06041},
    url={https://arxiv.org/abs/2202.06041}
}

@inproceedings{escolano-etal-2021-enabling-zero-shot,
    author={Escolano, Carlos and Costa-jussà, Marta R. and Fonollosa, José A. R. and Segura, Carlos},
    booktitle={2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)}, 
    title={Enabling Zero-Shot Multilingual Spoken Language Translation with Language-Specific Encoders and Decoders}, 
    year={2021},
    month=dec,
    volume={},
    number={},
    pages={694-701},
    doi={10.1109/ASRU51503.2021.9688026},
    url = "https://ieeexplore.ieee.org/document/9688026",
}

@inproceedings{rafieian-etal-2021-wmt21,
    title = "High Frequent In-domain Words Segmentation and Forward Translation for the {WMT}21 Biomedical Task",
    author = "Rafieian, Bardia  and
      Costa-jussa, Marta R.",
    booktitle = "Proceedings of the Sixth Conference on Machine Translation",
    month = nov,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.wmt-1.87",
    pages = "863--867",
    abstract = "This paper reports the optimization of using the out-of-domain data in the Biomedical translation task. We firstly optimized our parallel training dataset using the BabelNet in-domain terminology words. Afterward, to increase the training set, we studied the effects of the out-of-domain data on biomedical translation tasks, and we created a mixture of in-domain and out-of-domain training sets and added more in-domain data using forward translation in the English-Spanish task. Finally, with a simple bpe optimization method, we increased the number of in-domain sub-words in our mixed training set and trained the Transformer model on the generated data. Results show improvements using our proposed method.",
}

@inproceedings{escolano-etal-2021-wmt21,
    title = "The {TALP}-{UPC} Participation in {WMT}21 News Translation Task: an m{BART}-based {NMT} Approach",
    author = "Escolano, Carlos  and
      Tsiamas, Ioannis  and
      Basta, Christine  and
      Ferrando, Javier  and
      Costa-jussa, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the Sixth Conference on Machine Translation",
    month = nov,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.wmt-1.6",
    pages = "117--122",
    abstract = "This paper describes the submission to the WMT 2021 news translation shared task by the UPC Machine Translation group. The goal of the task is to translate German to French (De-Fr) and French to German (Fr-De). Our submission focuses on fine-tuning a pre-trained model to take advantage of monolingual data. We fine-tune mBART50 using the filtered data, and additionally, we train a Transformer model on the same data from scratch. In the experiments, we show that fine-tuning mBART50 results in 31.69 BLEU for De-Fr and 23.63 BLEU for Fr-De, which increases 2.71 and 1.90 BLEU accordingly, as compared to the model we train from scratch. Our final submission is an ensemble of these two models, further increasing 0.3 BLEU for Fr-De.",
}

@inproceedings{ferrando-costa-jussa-2021-attention-weights,
    title = "Attention Weights in Transformer {NMT} Fail Aligning Words Between Sequences but Largely Explain Model Predictions",
    author = "Ferrando, Javier  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
    month = nov,
    year = "2021",
    address = "Punta Cana, Dominican Republic",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.findings-emnlp.39",
    doi = "10.18653/v1/2021.findings-emnlp.39",
    pages = "434--443",
}
    
@inproceedings{gallego-etal-2021-iwslt21,
    title = "End-to-End Speech Translation with Pre-trained Models and Adapters: {UPC} at {IWSLT} 2021",
    author = "G{\'a}llego, Gerard I.  and
      Tsiamas, Ioannis  and
      Escolano, Carlos  and
      Fonollosa, Jos{\'e} A. R.  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the 18th International Conference on Spoken Language Translation (IWSLT 2021)",
    month = aug,
    year = "2021",
    address = "Bangkok, Thailand (online)",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.iwslt-1.11",
    doi = "10.18653/v1/2021.iwslt-1.11",
    pages = "110--119",
    abstract = "This paper describes the submission to the IWSLT 2021 offline speech translation task by the UPC Machine Translation group. The task consists of building a system capable of translating English audio recordings extracted from TED talks into German text. Submitted systems can be either cascade or end-to-end and use a custom or given segmentation. Our submission is an end-to-end speech translation system, which combines pre-trained models (Wav2Vec 2.0 and mBART) with coupling modules between the encoder and decoder, and uses an efficient fine-tuning technique, which trains only 20{\%} of its total parameters. We show that adding an Adapter to the system and pre-training it, can increase the convergence speed and the final result, with which we achieve a BLEU score of 27.3 on the MuST-C test set. Our final model is an ensemble that obtains 28.22 BLEU score on the same set. Our submission also uses a custom segmentation algorithm that employs pre-trained Wav2Vec 2.0 for identifying periods of untranscribable text and can bring improvements of 2.5 to 3 BLEU score on the IWSLT 2019 test set, as compared to the result with the given segmentation.",
}

@article{alastruey2021efficient,
  title={Efficient Transformer for Direct Speech Translation},
  author={Belen Alastruey and Gerard I. G{\'a}llego and Marta Ruiz Costa-juss{\`a}},
  year={2021},
  month=jul,
  journal={arXiv preprint arXiv:2107.03069},
  url={https://arxiv.org/abs/2107.03069}
}

@inproceedings{barrault-etal-2020-findings-first,
    title = "Findings of the {F}irst {S}hared {T}ask on {L}ifelong {L}earning {M}achine {T}ranslation",
    author = {Barrault, Lo{\"\i}c  and
      Biesialska, Magdalena  and
      Costa-juss{\`a}, Marta R.  and
      Bougares, Fethi  and
      Galibert, Olivier},
    booktitle = "Proceedings of the Fifth Conference on Machine Translation",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.wmt-1.2",
    pages = "56--64",
    abstract = "A lifelong learning system can adapt to new data without forgetting previously acquired knowledge. In this paper, we introduce the first benchmark for lifelong learning machine translation. For this purpose, we provide training, lifelong and test data sets for two language pairs: English-German and English-French. Additionally, we report the results of our baseline systems, which we make available to the public. The goal of this shared task is to encourage research on the emerging topic of lifelong learning machine translation.",
}

@inproceedings{biesialska-etal-2020-continual,
    title = "Continual Lifelong Learning in Natural Language Processing: A Survey",
    author = "Biesialska, Magdalena  and
      Biesialska, Katarzyna  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
    month = dec,
    year = "2020",
    address = "Barcelona, Spain (Online)",
    publisher = "International Committee on Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.coling-main.574",
    doi = "10.18653/v1/2020.coling-main.574",
    pages = "6523--6541",
    abstract = "Continual learning (CL) aims to enable information systems to learn from a continuous data stream across time. However, it is difficult for existing deep learning architectures to learn a new task without largely forgetting previously acquired knowledge. Furthermore, CL is particularly challenging for language learning, as natural language is ambiguous: it is discrete, compositional, and its meaning is context-dependent. In this work, we look at the problem of CL through the lens of various NLP tasks. Our survey discusses major challenges in CL and current methods applied in neural network models. We also provide a critical review of the existing CL evaluation methods and datasets in NLP. Finally, we present our outlook on future research directions.",
}

@inproceedings{casas-etal-2020-syntax,
    title = "Syntax-driven Iterative Expansion Language Models for Controllable Text Generation",
    author = "Casas, Noe  and
      Fonollosa, Jos{\'e} A. R.  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the Fourth Workshop on Structured Prediction for NLP",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.spnlp-1.1",
    doi = "10.18653/v1/2020.spnlp-1.1",
    pages = "1--10",
    abstract = "The dominant language modeling paradigm handles text as a sequence of discrete tokens. While that approach can capture the latent structure of the text, it is inherently constrained to sequential dynamics for text generation. We propose a new paradigm for introducing a syntactic inductive bias into neural text generation, where the dependency parse tree is used to drive the Transformer model to generate sentences iteratively. Our experiments show that this paradigm is effective at text generation, with quality between LSTMs and Transformers, and comparable diversity, requiring less than half their decoding steps, and its generation process allows direct control over the syntactic constructions of the generated text, enabling the induction of stylistic variations.",
}

@inproceedings{rafieian-costa-jussa-2020-e,
    title = "{E}-Commerce Content and Collaborative-based Recommendation using K-Nearest Neighbors and Enriched Weighted Vectors",
    author = "Rafieian, Bardia  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of Workshop on Natural Language Processing in E-Commerce",
    month = dec,
    year = "2020",
    address = "Barcelona, Spain",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.ecomnlp-1.1",
    pages = "1--10",
    abstract = "In this paper, we present two productive and functional recommender methods to improve the ac- curacy of predicting the right product for the user. One proposal is a survey-based recommender system that uses k-nearest neighbors. It recommends products by asking questions from the user, efficiently applying a binary product vector to the product attributes, and processing the request with a minimum error. The second proposal uses an enriched collaborative-based recommender system using enriched weighted vectors. Thanks to the style rules, the enriched collaborative- based method recommends outfits with competitive recommendation quality. We evaluated both of the proposals on a Kaggle fashion-dataset along with iMaterialist and, results show equivalent performance on binary gender and product attributes.",
}

@inproceedings{costa-jussa-de-jorge-2020-fine,
    title = "Fine-tuning Neural Machine Translation on Gender-Balanced Datasets",
    author = "Costa-juss{\`a}, Marta R.  and
      de Jorge, Adri{\`a}",
    booktitle = "Proceedings of the Second Workshop on Gender Bias in Natural Language Processing",
    month = dec,
    year = "2020",
    address = "Barcelona, Spain (Online)",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.gebnlp-1.3",
    pages = "26--34",
    abstract = "Misrepresentation of certain communities in datasets is causing big disruptions in artificial intelligence applications. In this paper, we propose using an automatically extracted gender-balanced dataset parallel corpus from Wikipedia. This balanced set is used to perform fine-tuning techniques from a bigger model trained on unbalanced datasets to mitigate gender biases in neural machine translation.",
}

@inproceedings{escolano-etal-2020-talp,
    title = "The {TALP}-{UPC} System Description for {WMT}20 News Translation Task: Multilingual Adaptation for Low Resource {MT}",
    author = "Escolano, Carlos  and
      Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the Fifth Conference on Machine Translation",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.wmt-1.10",
    pages = "134--138",
    abstract = "In this article, we describe the TALP-UPC participation in the WMT20 news translation shared task for Tamil-English. Given the low amount of parallel training data, we resort to adapt the task to a multilingual system to benefit from the positive transfer from high resource languages. We use iterative backtranslation to fine-tune the system and benefit from the monolingual data available. In order to measure the effectivity of such methods, we compare our results to a bilingual baseline system.",
}

@inproceedings{verges-boncompte-r-costa-jussa-2020-multilingual,
    title = "Multilingual Neural Machine Translation: Case-study for {C}atalan, {S}panish and {P}ortuguese {R}omance Languages",
    author = "Verg{\'e}s Boncompte, Pere  and
      R. Costa-juss{\`a}, Marta",
    booktitle = "Proceedings of the Fifth Conference on Machine Translation",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.wmt-1.54",
    pages = "447--450",
    abstract = "In this paper, we describe the TALP-UPC participation in the WMT Similar Language Translation task between Catalan, Spanish, and Portuguese, all of them, Romance languages. We made use of different techniques to improve the translation between these languages. The multilingual shared encoder/decoder has been used for all of them. Additionally, we applied back-translation to take advantage of the monolingual data. Finally, we have applied fine-tuning to improve the in-domain data. Each of these techniques brings improvements over the previous one. In the official evaluation, our system was ranked 1st in the Portuguese-to-Spanish direction, 2nd in the opposite direction, and 3rd in the Catalan-Spanish pair.",
}

@inproceedings{menendez-salazar-etal-2020-ipn,
    title = "The {IPN}-{CIC} team system submission for the {WMT} 2020 similar language task",
    author = "Men{\'e}ndez-Salazar, Luis A.  and
      Sidorov, Grigori  and
      Costa-Juss{\`a}, Marta R.",
    booktitle = "Proceedings of the Fifth Conference on Machine Translation",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.wmt-1.47",
    pages = "409--413",
    abstract = "This paper describes the participation of the NLP research team of the IPN Computer Research center in the WMT 2020 Similar Language Translation Task. We have submitted systems for the Spanish-Portuguese language pair (in both directions). The three submitted systems are based on the Transformer architecture and used fine tuning for domain Adaptation.",
}

@inproceedings{barrault-etal-2020-findings,
    title = "Findings of the 2020 Conference on Machine Translation ({WMT}20)",
    author = {Barrault, Lo{\"\i}c and Biesialska, Magdalena  and Bojar, Ond{\v{r}}ej and Costa-juss{\`a}, Marta R. and  Federmann, Christian  and  Graham, Yvette  and Grundkiewicz, Roman  and Haddow, Barry  and Huck, Matthias  and Joanis, Eric  and Kocmi, Tom  and Koehn, Philipp  and  Lo, Chi-kiu  and Ljube{\v{s}}i{\'c}, Nikola  and Monz, Christof  and Morishita, Makoto  and Nagata, Masaaki  and Nakazawa, Toshiaki  and Pal, Santanu  and Post, Matt  and Zampieri, Marcos},
    booktitle = "Proceedings of the Fifth Conference on Machine Translation",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.wmt-1.1",
    pages = "1--55",
    abstract = "This paper presents the results of the news translation task and the similar language translation task, both organised alongside the Conference on Machine Translation (WMT) 2020. In the news task, participants were asked to build machine translation systems for any of 11 language pairs, to be evaluated on test sets consisting mainly of news stories. The task was also opened up to additional test suites to probe specific aspects of translation. In the similar language translation task, participants built machine translation systems for translating between closely related pairs of languages.",
}

@inproceedings{wmt-2020-machine,
    title = "Proceedings of the Fifth Conference on Machine Translation",
	author = {Barrault, Lo{\"\i}c  and Bojar, Ond{\v{r}}ej  and Bougares, Fethi  and Chatterjee, Rajen  and Costa-juss{\`a}, Marta R.  and Federmann, Christian  and Fishel, Mark  and  Fraser, Alexander  and Graham, Yvette  and  Guzman, Paco  and Haddow, Barry  and Huck, Matthias and Yepes, Antonio Jimeno  and Koehn, Philipp  and Martins, Andr{\'e}  and  Morishita, Makoto  and Monz, Christof  and Nagata, Masaaki  and  Nakazawa, Toshiaki and Negri, Matteo},
    month = nov,
    year = "2020",
    address = "Online",
    booktitle = "Proceedings of the Fifth Conference on Machine Translation",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.wmt-1.0",
}

@inproceedings{gebnlp-2020-gender,
    title = "Proceedings of the Second Workshop on Gender Bias in Natural Language Processing",
    author = "Costa-juss{\`a}, Marta R.  and
      Hardmeier, Christian  and
      Radford, Will  and
      Webster, Kellie",
    month = dec,
    year = "2020",
    address = "Barcelona, Spain (Online)",
	booktitle= "Proceedings of the Second Workshop on Gender Bias in Natural Language Processing",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.gebnlp-1.0",
}

@article{costa2020amaleu,
  title={AMALEU: Una Representaci{\'o}n Universal del Lenguaje basada en Aprendizaje Autom{\'a}tico},
  author={Costa-Juss{\`a}, Marta Ruiz},
  journal={Procesamiento del lenguaje natural},
  number={65},
  pages={105--108},
  year={2020},
  publisher={Sociedad Espa{\~n}ola para el Procesamiento del Lenguaje Natural}
}

@article{doi:10.1002/asi.24395,
	author = {Escolano, Carlos and Costa-Juss{\`a}, Marta Ruiz and Fonollosa, Jos{\'e} A. R.},
	title = {From bilingual to multilingual neural-based machine translation by incremental training},
	journal = {Journal of the Association for Information Science and Technology},
	year = {2020},
	pages = {},
	doi = {10.1002/asi.24395},
	url = {https://asistdl.onlinelibrary.wiley.com/doi/abs/10.1002/asi.24395},
	eprint = {https://asistdl.onlinelibrary.wiley.com/doi/pdf/10.1002/asi.24395},
	abstract = {Abstract A common intermediate language representation in neural machine translation can be used to extend bilingual systems by incremental training. We propose a new architecture based on introducing an interlingual loss as an additional training objective. By adding and forcing this interlingual loss, we can train multiple encoders and decoders for each language, sharing among them a common intermediate representation. Translation results on the low-resource tasks (Turkish-English and Kazakh-English tasks) show a BLEU improvement of up to 2.8 points. However, results on a larger dataset (Russian-English and Kazakh-English) show BLEU losses of a similar amount. While our system provides improvements only for the low-resource tasks in terms of translation quality, our system is capable of quickly deploying new language pairs without the need to retrain the rest of the system, which may be a game changer in some situations. Specifically, what is most relevant regarding our architecture is that it is capable of: reducing the number of production systems, with respect to the number of languages, from quadratic to linear; incrementally adding a new language to the system without retraining the languages already there; and allowing for translations from the new language to all the others present in the system.}
}

@article{Basta2020,
  doi = {10.1007/s00521-020-05211-z},
  url = {https://doi.org/10.1007/s00521-020-05211-z},
  year = {2020},
  month = jul,
  publisher = {Springer Science and Business Media {LLC}},
  author = {Christine Basta and Marta R. Costa-juss{\`{a}} and Noe Casas},
  title = {Extensive study on the underlying gender bias in contextualized word embeddings},
  journal = {Neural Computing and Applications}
}

@inproceedings{basta-etal-2020-towards,
    title = "Towards Mitigating Gender Bias in a decoder-based Neural Machine Translation model by Adding Contextual Information",
    author = "Basta, Christine  and
      Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the The Fourth Widening Natural Language Processing Workshop",
    month = jul,
    year = "2020",
    address = "Seattle, USA",
    publisher = "Association for Computational Linguistics",
    doi = "10.18653/v1/2020.winlp-1.25",
    pages = "99--102",
    abstract = "Gender bias negatively impacts many natural language processing applications, including machine translation (MT). The motivation behind this work is to study whether recent proposed MT techniques are significantly contributing to attenuate biases in document-level and gender-balanced data. For the study, we consider approaches of adding the previous sentence and the speaker information, implemented in a decoder-based neural MT system. We show improvements both in translation quality (+1 BLEU point) as well as in gender bias mitigation on WinoMT (+5{\%} accuracy).",
}

@article{escolano2020training,
  title={Training Multilingual Machine Translation by Alternately Freezing Language-Specific Encoders-Decoders},
  author={Escolano, Carlos and Costa-juss{\`a}, Marta R and Fonollosa, Jos{\'e} A. R. and Artetxe, Mikel},
  journal={arXiv preprint arXiv:2006.01594},
  year={2020},
  url={https://arxiv.org/abs/2006.01594}
}

@inproceedings{carrino-etal-2019-terminology,
    title = "Terminology-Aware Segmentation and Domain Feature for the {WMT}19 Biomedical Translation Task",
    author = "Carrino, Casimiro Pio  and
      Rafieian, Bardia  and
      Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)",
    month = aug,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W19-5418",
    doi = "10.18653/v1/W19-5418",
    pages = "151--155",
    abstract = "In this work, we give a description of the TALP-UPC systems submitted for the WMT19 Biomedical Translation Task. Our proposed strategy is NMT model-independent and relies only on one ingredient, a biomedical terminology list. We first extracted such a terminology list by labelling biomedical words in our training dataset using the BabelNet API. Then, we designed a data preparation strategy to insert the terms information at a token level. Finally, we trained the Transformer model with this terms-informed data. Our best-submitted system ranked 2nd and 3rd for Spanish-English and English-Spanish translation directions, respectively.",
}

@inproceedings{carrino-etal-2020-automatic,
    title = "Automatic {S}panish Translation of {SQ}u{AD} Dataset for Multi-lingual Question Answering",
    author = "Carrino, Casimiro Pio  and
      Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference",
    month = may,
    year = "2020",
    address = "Marseille, France",
    publisher = "European Language Resources Association",
    url = "https://www.aclweb.org/anthology/2020.lrec-1.677",
    pages = "5515--5523",
    abstract = "Recently, multilingual question answering became a crucial research topic, and it is receiving increased interest in the NLP community. However, the unavailability of large-scale datasets makes it challenging to train multilingual QA systems with performance comparable to the English ones. In this work, we develop the Translate Align Retrieve (TAR) method to automatically translate the Stanford Question Answering Dataset (SQuAD) v1.1 to Spanish. We then used this dataset to train Spanish QA systems by fine-tuning a Multilingual-BERT model. Finally, we evaluated our QA models with the recently proposed MLQA and XQuAD benchmarks for cross-lingual Extractive QA. Experimental results show that our models outperform the previous Multilingual-BERT baselines achieving the new state-of-the-art values of 68.1 F1 on the Spanish MLQA corpus and 77.6 F1 on the Spanish XQuAD corpus. The resulting, synthetically generated SQuAD-es v1.1 corpora, with almost 100{\%} of data contained in the original English version, to the best of our knowledge, is the first large-scale QA training resource for Spanish.",
    language = "English",
    ISBN = "979-10-95546-34-4",
}

@inproceedings{basta2019evaluating,
  title={Evaluating the Underlying Gender Bias in Contextualized Word Embeddings},
  author={Basta, Christine and Costa-juss{\`a}, Marta R and Casas, Noe},
  booktitle={Proceedings of the First Workshop on Gender Bias in Natural Language Processing},
  pages={33--39},
  year={2019},
  month = aug
}

@inproceedings{casas2018differentiable,
  title={A differentiable bleu loss. analysis and first results},
  author={Casas, Noe and Fonollosa, Jos{\'e} A. R. and Ruiz Costa-Juss{\`a}, Marta},
  booktitle={ICLR 2018 Workshop Track: 6th International Conference on Learning Representations: Vancouver Convention Center, Vancouver, BC, Canada: April 30-May 3, 2018},
  year={2018},
}

@inproceedings{casas-etal-2018-talp,
    title = "The {TALP}-{UPC} Machine Translation Systems for {WMT}18 News Shared Translation Task",
    author = "Casas, Noe  and
      Escolano, Carlos  and
      Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the Third Conference on Machine Translation: Shared Task Papers",
    month = oct,
    year = "2018",
    address = "Belgium, Brussels",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W18-6406",
    doi = "10.18653/v1/W18-6406",
    pages = "355--360",
    abstract = "In this article we describe the TALP-UPC research group participation in the WMT18 news shared translation task for Finnish-English and Estonian-English within the multi-lingual subtrack. All of our primary submissions implement an attention-based Neural Machine Translation architecture. Given that Finnish and Estonian belong to the same language family and are similar, we use as training data the combination of the datasets of both language pairs to paliate the data scarceness of each individual pair. We also report the translation quality of systems trained on individual language pair data to serve as baseline and comparison reference.",
}

@inproceedings{torregrosa-etal-2019-leveraging,
    title = "Leveraging Rule-Based Machine Translation Knowledge for Under-Resourced Neural Machine Translation Models",
    author = "Torregrosa, Daniel  and
      Pasricha, Nivranshu  and
      Masoud, Maraim  and
      Chakravarthi, Bharathi Raja  and
      Alonso, Juan  and
      Casas, Noe  and
      Arcan, Mihael",
    booktitle = "Proceedings of Machine Translation Summit XVII Volume 2: Translator, Project and User Tracks",
    month = aug,
    year = "2019",
    address = "Dublin, Ireland",
    publisher = "European Association for Machine Translation",
    url = "https://www.aclweb.org/anthology/W19-6725",
    pages = "125--133",
}

@inproceedings{casas-etal-2019-talp,
    title = "The {TALP}-{UPC} Machine Translation Systems for {WMT}19 News Translation Task: Pivoting Techniques for Low Resource {MT}",
    author = "Casas, Noe  and
      Fonollosa, Jos{\'e} A. R.  and
      Escolano, Carlos  and
      Basta, Christine  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1)",
    month = aug,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W19-5311",
    doi = "10.18653/v1/W19-5311",
    pages = "155--162",
    abstract = "In this article, we describe the TALP-UPC research group participation in the WMT19 news translation shared task for Kazakh-English. Given the low amount of parallel training data, we resort to using Russian as pivot language, training subword-based statistical translation systems for Russian-Kazakh and Russian-English that were then used to create two synthetic pseudo-parallel corpora for Kazakh-English and English-Kazakh respectively. Finally, a self-attention model based on the decoder part of the Transformer architecture was trained on the two pseudo-parallel corpora."
}

@article{artetxe2020all,
  title={Do all Roads Lead to Rome? Understanding the Role of Initialization in Iterative Back-Translation},
  author={Artetxe, Mikel and Labaka, Gorka and Casas, Noe and Agirre, Eneko},
  journal={arXiv preprint arXiv:2002.12867},
  year={2020},
  url="https://arxiv.org/abs/2002.12867"
}

@inproceedings{casas-etal-2020-combining,
    title = "Combining Subword Representations into Word-level Representations in the Transformer Architecture",
    author = "Casas, Noe  and
      Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
    month = jul,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.acl-srw.10",
    doi = "10.18653/v1/2020.acl-srw.10",
    pages = "66--71",
    abstract = "In Neural Machine Translation, using word-level tokens leads to degradation in translation quality. The dominant approaches use subword-level tokens, but this increases the length of the sequences and makes it difficult to profit from word-level information such as POS tags or semantic dependencies. We propose a modification to the Transformer model to combine subword-level representations into word-level ones in the first layers of the encoder, reducing the effective length of the sequences in the following layers and providing a natural point to incorporate extra word-level information. Our experiments show that this approach maintains the translation quality with respect to the normal Transformer model when no extra word-level information is injected and that it is superior to the currently dominant method for incorporating word-level source language information to models based on subword-level vocabularies.",
}

@inproceedings{biesialska-etal-2019-talp,
    title = "The {TALP}-{UPC} System for the {WMT} Similar Language Task: Statistical vs Neural Machine Translation",
    author = "Biesialska, Magdalena  and
      Guardia, Lluis  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)",
    month = aug,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W19-5424",
    doi = "10.18653/v1/W19-5424",
    pages = "185--191",
    abstract = "Although the problem of similar language translation has been an area of research interest for many years, yet it is still far from being solved. In this paper, we study the performance of two popular approaches: statistical and neural. We conclude that both methods yield similar results; however, the performance varies depending on the language pair. While the statistical approach outperforms the neural one by a difference of 6 BLEU points for the Spanish-Portuguese language pair, the proposed neural model surpasses the statistical one by a difference of 2 BLEU points for Czech-Polish. In the former case, the language similarity (based on perplexity) is much higher than in the latter case. Additionally, we report negative results for the system combination with back-translation. Our TALP-UPC system submission won 1st place for Czech-{\textgreater}Polish and 2nd place for Spanish-{\textgreater}Portuguese in the official evaluation of the 1st WMT Similar Language Translation task.",
}

@article{biesialska-2020-refinement,
  place = {NL},
  title = {Refinement of Unsupervised Cross-Lingual Word Embeddings},
  volume = {325},
  ISSN = {0922-6389},
  url = {https://doi.org/10.3233/FAIA200317},
  DOI = {10.3233/FAIA200317},
  number = {ECAI 2020},
  journal = {Frontiers in Artificial Intelligence and Applications},
  publisher = {IOS Press},
  author = {Biesialska, Magdalena  and
      Costa-juss{\`a}, Marta R.},
  year = {2020},
  pages = {1978–1981}
}

@inproceedings{biesialska-etal-2020-enhancing,
    title = "Enhancing Word Embeddings with Knowledge Extracted from Lexical Resources",
    author = "Biesialska, Magdalena  and
      Rafieian, Bardia  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
    month = jul,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.acl-srw.36",
    doi = "10.18653/v1/2020.acl-srw.36",
    pages = "271--278",
    abstract = "In this work, we present an effective method for semantic specialization of word vector representations. To this end, we use traditional word embeddings and apply specialization methods to better capture semantic relations between words. In our approach, we leverage external knowledge from rich lexical resources such as BabelNet. We also show that our proposed post-specialization method based on an adversarial neural network with the Wasserstein distance allows to gain improvements over state-of-the-art methods on two tasks: word similarity and dialog state tracking.",
}

@inproceedings{escolano-etal-2017-talp,
    title = "The {TALP}-{UPC} Neural Machine Translation System for {G}erman/{F}innish-{E}nglish Using the Inverse Direction Model in Rescoring",
    author = "Escolano, Carlos  and
      Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the Second Conference on Machine Translation",
    month = sep,
    year = "2017",
    address = "Copenhagen, Denmark",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W17-4725",
    doi = "10.18653/v1/W17-4725",
    pages = "283--287",
}

@inproceedings{vila2018end,
  title={End-to-End Speech Translation with the Transformer.},
  author={Vila, Laura Cross and Escolano, Carlos and Fonollosa, Jos{\'e} A. R. and Costa-juss{\`a}, Marta R},
  booktitle={IberSPEECH},
  pages={60--63},
  year={2018}
}

@inproceedings{escolano-etal-2019-bilingual,
    title = "From Bilingual to Multilingual Neural Machine Translation by Incremental Training",
    author = "Escolano, Carlos  and
      Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
    month = jul,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/P19-2033",
    doi = "10.18653/v1/P19-2033",
    pages = "236--242",
    abstract = "Multilingual Neural Machine Translation approaches are based on the use of task specific models and the addition of one more language can only be done by retraining the whole system. In this work, we propose a new training schedule that allows the system to scale to more languages without modification of the previous components based on joint training and language-independent encoder/decoder modules allowing for zero-shot translation. This work in progress shows close results to state-of-the-art in the WMT task.",
}

@inproceedings{escolano-etal-2019-multilingual,
    title = "Multilingual, Multi-scale and Multi-layer Visualization of Intermediate Representations",
    author = "Escolano, Carlos  and
      Costa-juss{\`a}, Marta R.  and
      Lacroux, Elora  and
      V{\'a}zquez, Pere-Pau",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations",
    month = nov,
    year = "2019",
    address = "Hong Kong, China",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/D19-3026",
    doi = "10.18653/v1/D19-3026",
    pages = "151--156",
    abstract = "The main alternatives nowadays to deal with sequences are Recurrent Neural Networks (RNN) architectures and the Transformer. In this context, Both RNN{'}s and Transformer have been used as an encoder-decoder architecture with multiple layers in each module. Far beyond this, these architectures are the basis for the contextual word embeddings which are revolutionizing most natural language downstream applications. However, intermediate representations in either the RNN or Transformer architectures can be difficult to interpret. To make these layer representations more accessible and meaningful, we introduce a web-based tool that visualizes them both at the sentence and token level. We present three use cases. The first analyses gender issues in contextual word embeddings. The second and third are showing multilingual intermediate representations for sentences and tokens and the evolution of these intermediate representations along with the multiple layers of the decoder and in the context of multilingual machine translation.",
}

@inproceedings{escolano-etal-2021-multilingual,
    title = "Multilingual Machine Translation: Closing the Gap between Shared and Language-specific Encoder-Decoders",
    author = "Escolano, Carlos  and
      Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.  and
      Artetxe, Mikel",
    booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
    month = apr,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2021.eacl-main.80",
    pages = "944--948",
    abstract = "State-of-the-art multilingual machine translation relies on a universal encoder-decoder, which requires retraining the entire system to add new languages. In this paper, we propose an alternative approach that is based on language-specific encoder-decoders, and can thus be more easily extended to new languages by learning their corresponding modules. So as to encourage a common interlingua representation, we simultaneously train the N initial languages. Our experiments show that the proposed approach outperforms the universal encoder-decoder by 3.28 BLEU points on average, while allowing to add new languages without the need to retrain the rest of the modules. All in all, our work closes the gap between shared and language-specific encoderdecoders, advancing toward modular multilingual machine translation systems that can be flexibly extended in lifelong learning settings.",
}

@article{escolano2017generacion,
  title={Generaci{\'o}n morfol{\'o}gica con algoritmos de aprendizaje profundo integrada en un sistema de traducci{\'o}n autom{\'a}tica estad{\'\i}stica},
  author={Escolano, Carlos and Ruiz Costa-Juss{\`a}, Marta},
  journal={Procesamiento del lenguaje natural (SEPLN)},
  number={59},
  pages={107--114},
  year={2017}
}


@article{marino2006n,
  title={N-gram-based machine translation},
  author={Marino, Jos{\'e} B and Banchs, Rafael E and Crego, Josep M and de Gispert, Adri{\`a} and Lambert, Patrik and Fonollosa, Jos{\'e} A. R. and Costa-juss{\`a}, Marta R},
  journal={Computational linguistics},
  volume={32},
  number={4},
  pages={527--549},
  year={2006},
  publisher={MIT Press}
}

@inproceedings{costa-jussa-fonollosa-2016-character,
    title = "Character-based Neural Machine Translation",
    author = "Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
    month = aug,
    year = "2016",
    address = "Berlin, Germany",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/P16-2058",
    doi = "10.18653/v1/P16-2058",
    pages = "357--361",
}

@article{costa2015latest,
  title={Latest trends in hybrid machine translation and its applications},
  author={Costa-Jussa, Marta R and Fonollosa, Jos{\'e} A. R.},
  journal={Computer Speech & Language},
  volume={32},
  number={1},
  pages={3--10},
  year={2015},
  publisher={Elsevier}
}

@inproceedings{costa-jussa-fonollosa-2006-statistical,
    title = "Statistical Machine Reordering",
    author = "Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the 2006 Conference on Empirical Methods in Natural Language Processing",
    month = jul,
    year = "2006",
    address = "Sydney, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W06-1609",
    pages = "70--76",
}

@article{costa2012study,
  title={Study and comparison of rule-based and statistical catalan-spanish machine translation systems},
  author={Costa-Jussa, Marta R and Farr{\'u}s, Mireia and Marino, Jos{\'e} B and Fonollosa, Jos{\'e} A. R.},
  journal={Computing and informatics},
  volume={31},
  number={2},
  pages={245--270},
  year={2012}
}

@inproceedings{barrault-etal-2019-findings,
    title = "Findings of the 2019 Conference on Machine Translation ({WMT}19)",
    author = {Barrault, Lo{\"\i}c  and Bojar, Ond{\v{r}}ej  and Costa-juss{\`a}, Marta R.  and Federmann, Christian  and Fishel, Mark  and  Graham, Yvette  and  Haddow, Barry  and Huck, Matthias and Koehn, Philipp  and Malmasi, Shervin  and Monz, Christof  and M{\"u}ller, Mathias  and  Pal, Santanu  and Post, Matt  and Zampieri, Marcos},
    booktitle = "Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1)",
    month = aug,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W19-5301",
    doi = "10.18653/v1/W19-5301",
    pages = "1--61",
    abstract = "This paper presents the results of the premier shared task organized alongside the Conference on Machine Translation (WMT) 2019. Participants were asked to build machine translation systems for any of 18 language pairs, to be evaluated on a test set of news stories. The main metric for this task is human judgment of translation quality. The task was also opened up to additional test suites to probe specific aspects of translation.",
}

@inproceedings{mellebeek-etal-2010-opinion,
    title = "Opinion Mining of {S}panish Customer Comments with Non-Expert Annotations on Mechanical Turk",
    author = "Mellebeek, Bart  and
      Benavent, Francesc  and
      Grivolla, Jens  and
      Codina, Joan  and
      R. Costa-juss{\`a}, Marta  and
      Banchs, Rafael",
    booktitle = "Proceedings of the {NAACL} {HLT} 2010 Workshop on Creating Speech and Language Data with {A}mazon{'}s Mechanical Turk",
    month = jun,
    year = "2010",
    address = "Los Angeles",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W10-0718",
    pages = "114--121",
}

@inproceedings{farrus2010linguistic,
  title={Linguistic-based evaluation criteria to identify statistical machine translation errors},
  author={Farr{\'u}s Cabeceran, Mireia and Ruiz Costa-Juss{\`a}, Marta and Mari{\~n}o Acebal, Jos{\'e} Bernardo and Fonollosa, Jos{\'e} A. R.},
  booktitle={14th Annual Conference of the European Association for Machine Translation},
  pages={167--173},
  year={2010}
}

@article{marinobilingual,
  title={Bilingual N-gram Statistical Machine Translation},
  author={Marino, Jos{\'e} B and Banchs, Rafael E and Lambert, Patrik and Ruiz, Marta and Crego, Josep Mar{\i}a and Fonollosa, Jos{\'e} A. R.},
  year={2005}
}

@inproceedings{banchs-costa-jussa-2011-semantic,
    title = "A Semantic Feature for Statistical Machine Translation",
    author = "Banchs, Rafael E.  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of Fifth Workshop on Syntax, Semantics and Structure in Statistical Translation",
    month = jun,
    year = "2011",
    address = "Portland, Oregon, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W11-1014",
    pages = "126--134",
}

@inproceedings{schwenk-etal-2007-smooth,
    title = "Smooth Bilingual $N$-Gram Translation",
    author = "Schwenk, Holger  and
      R. Costa-juss{\`a}, Marta  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning ({EMNLP}-{C}o{NLL})",
    month = jun,
    year = "2007",
    address = "Prague, Czech Republic",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/D07-1045",
    pages = "430--438",
}

@article{costa2014statistical,
  title={Statistical machine translation enhancements through linguistic levels: A survey},
  author={Costa-Juss{\`a}, Marta R and Farr{\'u}s, Mireia},
  journal={ACM Computing Surveys (CSUR)},
  volume={46},
  number={3},
  pages={1--28},
  year={2014},
  publisher={ACM New York, NY, USA}
}

@article{farrus2011overcoming,
  title={Overcoming statistical machine translation limitations: error analysis and proposed solutions for the Catalan--Spanish language pair},
  author={Farr{\'u}s, Mireia and Costa-Juss{\`a}, Marta R and Mari{\~n}o, Jos{\'e} B and Poch, Marc and Hern{\'a}ndez, Adolfo and Henr{\'\i}quez, Carlos and Fonollosa, Jos{\'e} AR},
  journal={Language resources and evaluation},
  volume={45},
  number={2},
  pages={181--208},
  year={2011},
  publisher={Springer}
}

@inproceedings{schwenk2006continuous,
  title={Continuous space language models for the IWSLT 2006 task},
  author={Schwenk, Holger and Costa-Jussa, Marta R and Fonollosa, Jos{\'e} A. R.},
  booktitle={International Workshop on Spoken Language Translation (IWSLT) 2006},
  year={2006}
}

@inproceedings{crego2005n,
  title={N-gram-based versus phrase-based statistical machine translation},
  author={Crego, Josep M and Costa-Jussa, Marta R and Mari{\~n}o, Jos{\'e} B and Fonollosa, Jos{\'e} A. R.},
  booktitle={International Workshop on Spoken Language Translation (IWSLT) 2005},
  year={2005}
}

@article{farrus2012study,
  title={Study and correlation analysis of linguistic, perceptual and automatic machine translation evaluations},
  author={Farr{\'u}s, Mireia and Ruiz Costa-Juss{\`a}, Marta and Popovic, Maya and Henriquez, Carlos A},
  journal={Journal of the Association for Information Science and Technology},
  volume={63},
  number={1},
  pages={174--184},
  year={2012}
}

@inproceedings{ruiz2011using,
  title={Using linear interpolation and weighted reordering hypotheses in the Moses system},
  author={Ruiz Costa-Juss{\`a}, Marta and Fonollosa, Jos{\'e} A. R.},
  booktitle={Seventh Conference on International Language Resources and Evaluation},
  pages={1712--1718},
  year={2011}
}

@inproceedings{crego-etal-2006-n,
    title = "N-gram-based {SMT} System Enhanced with Reordering Patterns",
    author = "Crego, Josep M.  and
      de Gispert, Adri{\`a}  and
      Lambert, Patrik  and
      Costa-juss{\`a}, Marta R.  and
      Khalilov, Maxim  and
      Banchs, Rafael  and
      Mari{\~n}o, Jos{\'e} B.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings on the Workshop on Statistical Machine Translation",
    month = jun,
    year = "2006",
    address = "New York City",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W06-3125",
    pages = "162--165",
}

@inproceedings{escude-font-costa-jussa-2019-equalizing,
    title = "Equalizing Gender Bias in Neural Machine Translation with Word Embeddings Techniques",
    author = "Escud{\'e} Font, Joel  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the First Workshop on Gender Bias in Natural Language Processing",
    month = aug,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W19-3821",
    doi = "10.18653/v1/W19-3821",
    pages = "147--154",
    abstract = "Neural machine translation has significantly pushed forward the quality of the field. However, there are remaining big issues with the output translations and one of them is fairness. Neural models are trained on large text corpora which contain biases and stereotypes. As a consequence, models inherit these social biases. Recent methods have shown results in reducing gender bias in other natural language processing tools such as word embeddings. We take advantage of the fact that word embeddings are used in neural machine translation to propose a method to equalize gender biases in neural machine translation using these representations. Specifically, we propose, experiment and analyze the integration of two debiasing techniques over GloVe embeddings in the Transformer translation architecture. We evaluate our proposed system on the WMT English-Spanish benchmark task, showing gains up to one BLEU point. As for the gender bias evaluation, we generate a test set of occupations and we show that our proposed system learns to equalize existing biases from the baseline system.",
}

@article{costa2012machine,
  title={Machine translation in medicine},
  author={Costa-juss{\`a}, Marta R and Farr{\'u}s, Mireia and Pons, Jordi Serrano},
  journal={Proceedings in ARSA-Advanced Research in Scientific Areas},
  number={1},
  year={2012}
}

@inproceedings{li2008talp,
  title={The talp \& i2r smt systems for iwslt 2008},
  author={Li, H and Aw, A and Zhang, M and Khalilov, Maxim and Ruiz Costa-Juss{\`a}, Marta and Henr{\'\i}quez Quintana, Carlos Alberto and Fonollosa, Jos{\'e} A. R. and Hern{\'a}ndez, A and Mari{\~n}o Acebal, Jos{\'e} Bernardo and Banchs Mart{\'\i}nez, Rafael Enrique and others},
  booktitle={International Workshop on Spoken Language Translation},
  pages={116--123},
  year={2008},
  organization={NICT/ATR}
}

@article{costa2012evaluating,
  title={Evaluating indirect strategies for chinese-spanish statistical machine translation},
  author={Costa-Juss{\`a}, Marta R and Henr{\'\i}quez, Carlos A and Banchs, Rafael E},
  journal={Journal of artificial intelligence research},
  volume={45},
  pages={761--780},
  year={2012}
}

@inproceedings{farrus2009improving,
  title={Improving a Catalan-Spanish statistical translation system using morphosyntactic knowledge},
  author={Farr{\'u}s, Mireia and Costa-juss{\`a}, Marta R and Poch, Marc and Hern{\'a}ndez, Adolfo and Mari{\~n}o Acebal, Jos{\'e} B},
  booktitle={Proceedings of the 13th Annual Conference of the European Association for Machine Translation; 2009 May 14-15; Barcelona, Spain. Barcelona: European Association for Machine Translation; 2009. p. 52-7.},
  year={2009},
  organization={European Association for Machine Translation}
}

@article{costa2017introduction,
  title={Introduction to the special issue on deep learning approaches for machine translation},
  author={Costa-juss{\`a}, Marta R and Allauzen, Alexandre and Barrault, Lo{\"\i}c and Cho, Kyunghun and Schwenk, Holger},
  journal={Computer Speech \& Language},
  volume={46},
  pages={367--373},
  year={2017},
  publisher={Elsevier}
}

@inproceedings{r-costa-jussa-etal-2007-ngram,
    title = "Ngram-Based Statistical Machine Translation Enhanced with Multiple Weighted Reordering Hypotheses",
    author = "R. Costa-juss{\`a}, Marta  and
      Crego, Josep M.  and
      Lambert, Patrik  and
      Khalilov, Maxim  and
      Fonollosa, Jos{\'e} A. R.  and
      Mari{\~n}o, Jos{\'e} B.  and
      Banchs, Rafael E.",
    booktitle = "Proceedings of the Second Workshop on Statistical Machine Translation",
    month = jun,
    year = "2007",
    address = "Prague, Czech Republic",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W07-0720",
    pages = "167--170",
}

@article{costa2017chinese,
  title={Chinese--spanish neural machine translation enhanced with character and word bitmap fonts},
  author={Costa-Juss{\`a}, Marta R and Ald{\'o}n, David and Fonollosa, Jos{\'e} A. R.},
  journal={Machine Translation},
  volume={31},
  number={1-2},
  pages={35--47},
  year={2017},
  publisher={Springer}
}

@inproceedings{haque2010sentence,
  title={Sentence similarity-based source context modelling in pbsmt},
  author={Haque, Rejwanul and Naskar, Sudip Kumar and Way, Andy and Costa-Juss{\`a}, Marta R and Banchs, Rafael E},
  booktitle={2010 International Conference on Asian Language Processing},
  pages={257--260},
  year={2010},
  organization={IEEE}
}

@inproceedings{costa-jussa-etal-2014-english,
    title = "{E}nglish-to-{H}indi system description for {WMT} 2014: Deep Source-Context Features for {M}oses",
    author = "Costa-juss{\`a}, Marta R.  and
      Gupta, Parth  and
      Rosso, Paolo  and
      Banchs, Rafael E.",
    booktitle = "Proceedings of the Ninth Workshop on Statistical Machine Translation",
    month = jun,
    year = "2014",
    address = "Baltimore, Maryland, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W14-3306",
    doi = "10.3115/v1/W14-3306",
    pages = "79--83",
}

@article{farrus2013automatic,
  title={Automatic evaluation for E-Learning using latent semantic analysis: A use case},
  author={Farrus, Mireia and Ruiz Costa-Juss{\`a}, Marta},
  journal={International review of research in open and distance learning},
  volume={14},
  number={1},
  pages={239--254},
  year={2013}
}

@inproceedings{melero2012holaaa,
  title={Holaaa!! writin like u talk is kewl but kinda hard 4 NLP},
  author={Melero, Maite and Ruiz Costa-Juss{\`a}, Marta and Domingo, Judit and Marquina, Montse and Quixal, Mart{\'\i}},
  booktitle={Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)},
  pages={3794--3800},
  year={2012}
}

@article{schulz2008tecnoparla,
  title={TECNOPARLA-Speech technologies for Catalan and its application to Speech-to-speech Translation},
  author={Schulz, Henrik and Costa-Juss, Marta R and Fonollosa, Jos{\'e} A. R.},
  journal={Procesamiento del lenguaje Natural},
  volume={41},
  year={2008}
}

@inproceedings{costa-jussa-2017-catalan,
    title = "Why {C}atalan-{S}panish Neural Machine Translation? Analysis, comparison and combination with standard Rule and Phrase-based technologies",
    author = "Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the Fourth Workshop on {NLP} for Similar Languages, Varieties and Dialects ({V}ar{D}ial)",
    month = apr,
    year = "2017",
    address = "Valencia, Spain",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W17-1207",
    doi = "10.18653/v1/W17-1207",
    pages = "55--62",
    abstract = "Catalan and Spanish are two related languages given that both derive from Latin. They share similarities in several linguistic levels including morphology, syntax and semantics. This makes them particularly interesting for the MT task. Given the recent appearance and popularity of neural MT, this paper analyzes the performance of this new approach compared to the well-established rule-based and phrase-based MT systems. Experiments are reported on a large database of 180 million words. Results, in terms of standard automatic measures, show that neural MT clearly outperforms the rule-based and phrase-based MT system on in-domain test set, but it is worst in the out-of-domain test set. A naive system combination specially works for the latter. In-domain manual analysis shows that neural MT tends to improve both adequacy and fluency, for example, by being able to generate more natural translations instead of literal ones, choosing to the adequate target word when the source word has several translations and improving gender agreement. However, out-of-domain manual analysis shows how neural MT is more affected by unknown words or contexts.",
}

@article{costa2009state,
  title={State-of-the-art word reordering approaches in statistical machine translation: A survey},
  author={Costa-Jussa, Marta R and Fonollosa, Jos{\'e} A. R.},
  journal={IEICE TRANSACTIONS on Information and Systems},
  volume={92},
  number={11},
  pages={2179--2185},
  year={2009},
  publisher={The Institute of Electronics, Information and Communication Engineers}
}

@article{costa2009ngram,
  title={An Ngram-based reordering model},
  author={Costa-Jussa, Marta R and Fonollosa, Jos{\'e} A. R.},
  journal={Computer Speech \& Language},
  volume={23},
  number={3},
  pages={362--375},
  year={2009},
  publisher={Elsevier}
}

@inproceedings{r-costa-jussa-etal-2007-analysis,
    title = "Analysis and System Combination of Phrase- and {N}-Gram-Based Statistical Machine Translation Systems",
    author = "R. Costa-juss{\`a}, Marta  and
      Crego, Josep M.  and
      Vilar, David  and
      Fonollosa, Jos{\'e} A. R.  and
      Mari{\~n}o, Jos{\'e} B.  and
      Ney, Hermann",
    booktitle = "Human Language Technologies 2007: The Conference of the North {A}merican Chapter of the Association for Computational Linguistics; Companion Volume, Short Papers",
    month = apr,
    year = "2007",
    address = "Rochester, New York",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/N07-2035",
    pages = "137--140",
}

@inproceedings{ruiz2011enhancing,
  title={Enhancing scarce-resource language translation through pivot combinations},
  author={Ruiz Costa-Juss{\`a}, Marta and Henr{\'\i}quez, Carlos and Banchs, Rafael E},
  booktitle={Proceedings of the 5th International Joint Conference on Natural Language Processing},
  pages={1361--1365},
  year={2011}
}

@inproceedings{costa2006talp,
  title={TALP phrase-based system and TALP system combination for IWSLT 2006},
  author={Costa-juss{\`a}, Marta R and Crego, Josep M and Gispert, Adri{\`a} de and Lambert, Patrik and Khalilov, Maxim and Fonollosa, Jos{\'e} A. R. and Mari{\~n}o, Jos{\'e} B and Banchs, Rafael E},
  booktitle={International Workshop on Spoken Language Translation (IWSLT) 2006},
  year={2006}
}

@inproceedings{banchs2010non,
  title={A non-linear semantic mapping technique for cross-language sentence matching},
  author={Banchs, Rafael E and Costa-Juss{\`a}, Marta R},
  booktitle={International Conference on Natural Language Processing},
  pages={57--66},
  year={2010},
  organization={Springer}
}

@inproceedings{lambert2006machine,
  title={Machine Translation system development based on human likeness},
  author={Lambert, Patrik and Gim{\'e}nez, Jes{\'u}s and Costa-juss{\'a}, Marta R and Amig{\'o}, Enrique and Banchs, Rafael E and M{\'a}rquez, Llu{\'\i}s and Fonollosa, Jos{\'e} A. R.},
  booktitle={2006 IEEE Spoken Language Technology Workshop},
  pages={246--249},
  year={2006},
  organization={IEEE}
}

@inproceedings{crego2006talp,
  title={The TALP N-Gram-based SMT System for IWSLT 2006},
  author={Crego, Josep M and Gispert, Adri{\`a} de and Lambert, Patrik and Khalilov, Maxim and Costa-juss{\`a}, Marta R and Mari{\~n}o, Jos{\'e} B and Banchs, Rafael E and Fonollosa, Jos{\'e} A. R.},
  booktitle={International Workshop on Spoken Language Translation (IWSLT) 2006},
  year={2006}
}

@inproceedings{segura2019chatbol,
  title={Chatbol, a chatbot for the Spanish “La Liga”},
  author={Segura, Carlos and Palau, Alex and Luque, Jordi and Costa-Juss{\`a}, Marta R and Banchs, Rafael E},
  booktitle={9th International Workshop on Spoken Dialogue System Technology},
  pages={319--330},
  year={2019},
  organization={Springer}
}

@article{gupta2016deep,
  title={A deep source-context feature for lexical selection in statistical machine translation},
  author={Gupta, Parth and Costa-Jussa, Marta R and Rosso, Paolo and Banchs, Rafael E},
  journal={Pattern Recognition Letters},
  volume={75},
  pages={24--29},
  year={2016},
  publisher={Elsevier}
}

@article{costa2011vector,
  title={A vector-space dynamic feature for phrase-based statistical machine translation},
  author={Costa-Juss{\`a}, Marta R and Banchs, Rafael E},
  journal={Journal of Intelligent Information Systems},
  volume={37},
  number={2},
  pages={139--154},
  year={2011},
  publisher={Springer}
}

@inproceedings{costa-jussa-etal-2006-talp,
    title = "{TALP} Phrase-based statistical translation system for {E}uropean language pairs",
    author = "Costa-juss{\`a}, Marta R.  and
      Crego, Josep M.  and
      de Gispert, Adri{\`a}  and
      Lambert, Patrik  and
      Khalilov, Maxim  and
      Mari{\~n}o, Jos{\'e} B.  and
      Fonollosa, Jos{\'e} A. R.  and
      Banchs, Rafael",
    booktitle = "Proceedings on the Workshop on Statistical Machine Translation",
    month = jun,
    year = "2006",
    address = "New York City",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W06-3120",
    pages = "142--145",
}

@inproceedings{costa-jussa-etal-2008-using,
    title = "Using Reordering in Statistical Machine Translation based on Alignment Block Classification",
    author = "Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.  and
      Monte, Enric",
    booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
    month = may,
    year = "2008",
    address = "Marrakech, Morocco",
    publisher = "European Language Resources Association (ELRA)",
    url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/444_paper.pdf",
    abstract = "Statistical Machine Translation (SMT) is based on alignment models which learn from bilingual corpora the word correspondences between source and target language. These models are assumed to be capable of learning reorderings of sequences of words. However, the difference in word order between two languages is one of the most important sources of errors in SMT. This paper proposes a Recursive Alignment Block Classification algorithm (RABCA) that can take advantage of inductive learning in order to solve reordering problems. This algorithm should be able to cope with swapping examples seen during training; it should infer properties that might permit to reorder pairs of blocks (sequences of words) which did not appear during training; and finally it should be robust with respect to training errors and ambiguities. Experiments are reported on the EuroParl task and RABCA is tested using two state-of-the-art SMT systems: a phrased-based and an Ngram-based. In both cases, RABCA improves results.",
}

@book{costa2016hybrid,
  title={Hybrid approaches to machine translation},
  author={Costa-juss{\`a}, Marta R and Rapp, Reinhard and Lambert, Patrik and Eberle, Kurt and Banchs, Rafael E and Babych, Bogdan},
  year={2016},
  publisher={Springer}
}

@article{costa2015description,
  title={Description of the Chinese-to-Spanish rule-based machine translation system developed using a hybrid combination of human annotation and statistical techniques},
  author={Costa-Juss{\`a}, Marta R and Centelles, Jordi},
  journal={ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)},
  volume={15},
  number={1},
  pages={1--13},
  year={2015},
  publisher={ACM New York, NY, USA}
}

@article{costa2015much,
  title={How much hybridization does machine translation Need?},
  author={Costa-juss{\`a}, Marta R},
  journal={Journal of the Association for Information Science and Technology},
  volume={66},
  number={10},
  pages={2160--2165},
  year={2015},
  publisher={Wiley Online Library}
}

@article{costa2015towards,
  title={Towards human linguistic machine translation evaluation},
  author={Costa-juss{\`a}, Marta R and Farr{\'u}s, Mireia},
  journal={Digital Scholarship in the Humanities},
  volume={30},
  number={2},
  pages={157--166},
  year={2015},
  publisher={Oxford University Press}
}

@inproceedings{centelles-costa-jussa-2014-chinese,
    title = "{C}hinese-to-{S}panish rule-based machine translation system",
    author = "Centelles, Jordi  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the 3rd Workshop on Hybrid Approaches to Machine Translation ({H}y{T}ra)",
    month = apr,
    year = "2014",
    address = "Gothenburg, Sweden",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W14-1015",
    doi = "10.3115/v1/W14-1015",
    pages = "82--86",
}

@article{banchs2013cross,
  title={Cross-language document retrieval by using nonlinear semantic mapping},
  author={Banchs, Rafael E and Costa-Juss{\`a}, Marta R},
  journal={Applied Artificial Intelligence},
  volume={27},
  number={9},
  pages={781--802},
  year={2013},
  publisher={Taylor \& Francis}
}

@inproceedings{formiga-etal-2013-talp,
    title = "The {TALP}-{UPC} Phrase-Based Translation Systems for {WMT}13: System Combination with Morphology Generation, Domain Adaptation and Corpus Filtering",
    author = "Formiga, Llu{\'\i}s  and
      Costa-juss{\`a}, Marta R.  and
      Mari{\~n}o, Jos{\'e} B.  and
      Fonollosa, Jos{\'e} A. R.  and
      Barr{\'o}n-Cede{\~n}o, Alberto  and
      M{\`a}rquez, Llu{\'\i}s",
    booktitle = "Proceedings of the Eighth Workshop on Statistical Machine Translation",
    month = aug,
    year = "2013",
    address = "Sofia, Bulgaria",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W13-2215",
    pages = "134--140",
}

@article{costa2012overview,
  title={An overview of the phrase-based statistical machine translation techniques},
  author={Costa-juss{\`a}, Marta Ruiz},
  journal={The Knowledge Engineering Review},
  volume={27},
  number={4},
  pages={413--431},
  year={2012},
  publisher={Cambridge University Press}
}

@inproceedings{r-costa-jussa-r-fonollosa-2007-analysis,
    title = "Analysis of Statistical and Morphological Classes to Generate Weigthed Reordering Hypotheses on a Statistical Machine Translation System",
    author = "R. Costa-juss{\`a}, Marta  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the Second Workshop on Statistical Machine Translation",
    month = jun,
    year = "2007",
    address = "Prague, Czech Republic",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W07-0721",
    pages = "171--176",
}

@inproceedings{costa-jussa-etal-2018-neural,
    title = "A Neural Approach to Language Variety Translation",
    author = "Costa-juss{\`a}, Marta R.  and
      Zampieri, Marcos  and
      Pal, Santanu",
    booktitle = "Proceedings of the Fifth Workshop on {NLP} for Similar Languages, Varieties and Dialects ({V}ar{D}ial 2018)",
    month = aug,
    year = "2018",
    address = "Santa Fe, New Mexico, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W18-3931",
    pages = "275--282",
    abstract = "In this paper we present the first neural-based machine translation system trained to translate between standard national varieties of the same language. We take the pair Brazilian - European Portuguese as an example and compare the performance of this method to a phrase-based statistical machine translation system. We report a performance improvement of 0.9 BLEU points in translating from European to Brazilian Portuguese and 0.2 BLEU points when translating in the opposite direction. We also carried out a human evaluation experiment with native speakers of Brazilian Portuguese which indicates that humans prefer the output produced by the neural-based system in comparison to the statistical system.",
}

@article{costa2018feature,
  title={From feature to paradigm: deep learning in machine translation},
  author={Costa-Juss{\`a}, Marta R},
  journal={Journal of Artificial Intelligence Research},
  volume={61},
  pages={947--974},
  year={2018}
}

@article{costa2014using,
  title={Using annotations on Mechanical Turk to perform supervised polarity classification of Spanish customer comments},
  author={Costa-Jussa, Marta R and Grivolla, Jens and Mellebeek, Bart and Benavent, Francesc and Codina, Joan and Banchs, Rafael E},
  journal={Information Sciences},
  volume={275},
  pages={400--412},
  year={2014},
  publisher={Elsevier}
}

@inproceedings{federmann-etal-2012-results,
    title = "Results from the {ML}4{HMT}-12 Shared Task on Applying Machine Learning Techniques to Optimise the Division of Labour in Hybrid Machine Translation",
    author = "Federmann, Christian  and
      Okita, Tsuyoshi  and
      Melero, Maite  and
      Costa-Jussa, Marta R.  and
      Badia, Toni  and
      van Genabith, Josef",
    booktitle = "Proceedings of the Second Workshop on Applying Machine Learning Techniques to Optimise the Division of Labour in Hybrid {MT}",
    month = dec,
    year = "2012",
    address = "Mumbai, India",
    publisher = "The COLING 2012 Organizing Committee",
    url = "https://www.aclweb.org/anthology/W12-5709",
    pages = "85--90",
}

@inproceedings{adell2012buceador,
  title={Buceador, a multi-language search engine for digital libraries},
  author={Adell Mercado, Jordi and Bonafonte C{\'a}vez, Antonio and Cardenal, Antonio and Ruiz Costa-Juss{\`a}, Marta and Fonollosa, Jos{\'e} A. R. and Moreno Bilbao, M Asunci{\'o}n and Navas, Eva and Rodr{\'\i}guez Banga, Eduardo},
  booktitle={2012 International Conference on Language Resources and Evaluation: proceedings},
  pages={1705--1709},
  year={2012}
}

@inproceedings{henriquez-q-etal-2010-using,
    title = "Using Collocation Segmentation to Augment the Phrase Table",
    author = "Henr{\'\i}quez Q., Carlos A.  and
      Ruiz Costa-juss{\`a}, Marta  and
      Daudaravicius, Vidas  and
      Banchs, Rafael E.  and
      Mari{\~n}o, Jos{\'e} B.",
    booktitle = "Proceedings of the Joint Fifth Workshop on Statistical Machine Translation and {M}etrics{MATR}",
    month = jul,
    year = "2010",
    address = "Uppsala, Sweden",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W10-1712",
    pages = "98--102",
}

@inproceedings{marino2006upc,
  title={UPC’s Bilingual N-gram Translation System},
  author={Marino, Jos{\'e} B and Banchs, Rafael E and Crego, Josep M},
  year={2006}
}

@inproceedings{tubay-costa-jussa-2018-neural,
    title = "Neural Machine Translation with the Transformer and Multi-Source Romance Languages for the Biomedical {WMT} 2018 task",
    author = "Tubay, Brian  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the Third Conference on Machine Translation: Shared Task Papers",
    month = oct,
    year = "2018",
    address = "Belgium, Brussels",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W18-6449",
    doi = "10.18653/v1/W18-6449",
    pages = "667--670",
    abstract = "The Transformer architecture has become the state-of-the-art in Machine Translation. This model, which relies on attention-based mechanisms, has outperformed previous neural machine translation architectures in several tasks. In this system description paper, we report details of training neural machine translation with multi-source Romance languages with the Transformer model and in the evaluation frame of the biomedical WMT 2018 task. Using multi-source languages from the same family allows improvements of over 6 BLEU points.",
}

@inproceedings{yang-etal-2017-character,
    title = "Character-level Intra Attention Network for Natural Language Inference",
    author = "Yang, Han  and
      Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for {NLP}",
    month = sep,
    year = "2017",
    address = "Copenhagen, Denmark",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W17-5309",
    doi = "10.18653/v1/W17-5309",
    pages = "46--50",
    abstract = "Natural language inference (NLI) is a central problem in language understanding. End-to-end artificial neural networks have reached state-of-the-art performance in NLI field recently. In this paper, we propose Character-level Intra Attention Network (CIAN) for the NLI task. In our model, we use the character-level convolutional network to replace the standard word embedding layer, and we use the intra attention to capture the intra-sentence semantics. The proposed CIAN model provides improved results based on a newly published MNLI corpus.",
}

@inproceedings{costa-jussa-2016-moses,
    title = "{M}oses-based official baseline for {NEWS} 2016",
    author = "Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the Sixth Named Entity Workshop",
    month = aug,
    year = "2016",
    address = "Berlin, Germany",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W16-2713",
    doi = "10.18653/v1/W16-2713",
    pages = "88--90",
}

@article{ruiz2015mooc,
  title={A MOOC on approaches to machine translation},
  author={Ruiz Costa-juss{\`a}, Marta and Formiga, Llu{\'\i}s and Torrillas, Oriol and Petit, Jordi and Fonollosa, Jos{\'e} A. R.},
  journal={International Review of Research in Open and Distributed Learning},
  volume={16},
  number={6},
  pages={174--205},
  year={2015},
  publisher={Athabasca University Press (AU Press)}
}


@InProceedings{ruiz2014detailed,
author="Costa-juss{\`a}, Marta R.
and Formiga, Llu{\'i}s
and Petit, Jordi
and Fonollosa, Jos{\'e} A. R.",
editor="Gelbukh, Alexander
and Espinoza, F{\'e}lix Castro
and Galicia-Haro, Sof{\'i}a N.",
title="Detailed Description of the Development of a MOOC in the Topic of Statistical Machine Translation",
booktitle="Human-Inspired Computing and Its Applications",
year="2014",
publisher="Springer International Publishing",
address="Cham",
pages="92--98",
abstract="This paper describes the design, development and execution of a MOOC entitled ``Approaches to Machine Translation: rule-based, statistical and hybrid''. The course is launched from the Canvas platform used by recognized European universities. The course contains video-lecture, quizzes and laboratory assignments. Evaluation is done using a virtual learning environment for computer programming and peer-to-peer strategies. This MOOC allows to introduce people from various areas to the Machine Translation theory and practice. It also allows to internationalize different tools developed at the Universitat Polit{\`e}cnica de Catalunya.",
isbn="978-3-319-13647-9"
}

@inproceedings{lambert2007talp,
  title={The TALP N-Gram-based SMT System for IWSLT 2007},
  author={Lambert, Patrik and Costa-Juss{\`a}, Marta R and Crego, Josep M and Khalilov, Maxim and Mari{\~n}o, Jos{\'e} B and Banchs, Rafael E and Fonollosa, Jos{\'e} AR and Schwenk, Holger},
  booktitle={International Workshop on Spoken Language Translation (IWSLT) 2007},
  year={2007}
}

@inproceedings{ruiz-costa-jussa-fonollosa-2005-improving,
    title = "Improving Phrase-Based Statistical Translation by Modifying Phrase Extraction and Including Several Features",
    author = "Ruiz Costa-juss{\`a}, Marta  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the {ACL} Workshop on Building and Using Parallel Texts",
    month = jun,
    year = "2005",
    address = "Ann Arbor, Michigan",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W05-0827",
    pages = "149--154",
}

@inproceedings{belanche2017bridging,
  title={Bridging deep and kernel methods},
  author={Belanche Mu{\~n}oz, Luis Antonio and Ruiz Costa-Juss{\`a}, Marta},
  booktitle={ESANN2017: 25th European Symposium on Artificial Neural Networks: Bruges, Belgium, April 26-27-28},
  pages={1--10},
  year={2017}
}

@article{kazimi2017coverage,
  title={Coverage for Character Based Neural Machine Translation},
  author={Kazimi, M Bashir and Costa-juss{\`a}, Marta R},
  journal={Procesamiento del Lenguaje Natural},
  volume={59},
  pages={99--106},
  year={2017}
}

@inproceedings{costa-jussa-etal-2016-talp,
    title = "The {TALP}{--}{UPC} {S}panish{--}{E}nglish {WMT} Biomedical Task: Bilingual Embeddings and Char-based Neural Language Model Rescoring in a Phrase-based System",
    author = "Costa-juss{\`a}, Marta R.  and
      Espa{\~n}a-Bonet, Cristina  and
      Madhyastha, Pranava  and
      Escolano, Carlos  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the First Conference on Machine Translation: Volume 2, Shared Task Papers",
    month = aug,
    year = "2016",
    address = "Berlin, Germany",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W16-2336",
    doi = "10.18653/v1/W16-2336",
    pages = "463--468",
}

@article{costa2015large,
  title={A large Spanish-Catalan parallel corpus release for machine translation},
  author={Costa-Jussa, Marta R and Fonollosa, Jos{\'e} A. R. and Mari{\~n}o, Jos{\'e} B and Poch, Marc and Farr{\'u}s, Mireia},
  journal={Computing and Informatics},
  volume={33},
  number={4},
  pages={907--920},
  year={2015}
}

@inproceedings{centelles-etal-2014-chispa,
    title = "{CHISPA} on the {GO}: A mobile {C}hinese-{S}panish translation service for travellers in trouble",
    author = "Centelles, Jordi  and
      Costa-juss{\`a}, Marta R.  and
      Banchs, Rafael E.",
    booktitle = "Proceedings of the Demonstrations at the 14th Conference of the {E}uropean Chapter of the Association for Computational Linguistics",
    month = apr,
    year = "2014",
    address = "Gothenburg, Sweden",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/E14-2009",
    doi = "10.3115/v1/E14-2009",
    pages = "33--36",
}

@inproceedings{federmann-etal-2012-ml4hmt,
    title = "The {ML}4{HMT} Workshop on Optimising the Division of Labour in Hybrid Machine Translation",
    author = "Federmann, Christian  and
      Avramidis, Eleftherios  and
      Costa-juss{\`a}, Marta R.  and
      van Genabith, Josef  and
      Melero, Maite  and
      Pecina, Pavel",
    booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
    month = may,
    year = "2012",
    address = "Istanbul, Turkey",
    publisher = "European Language Resources Association (ELRA)",
    url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/996_Paper.pdf",
    pages = "3430--3435",
    abstract = "We describe the Â“Shared Task on Applying Machine Learning Techniques to Optimise the Division of Labour in Hybrid Machine TranslationÂ” (ML4HMT) which aims to foster research on improved system combination approaches for machine translation (MT). Participants of the challenge are requested to build hybrid translations by combining the output of several MT systems of different types. We first describe the ML4HMT corpus used in the shared task, then explain the XLIFF-based annotation format we have designed for it, and briefly summarize the participating systems. Using both automated metrics scores and extensive manual evaluation, we discuss the individual performance of the various systems. An interesting result from the shared task is the fact that we were able to observe different systems winning according to the automated metrics scores when compared to the results from the manual evaluation. We conclude by summarising the first edition of the challenge and by giving an outlook to future work.",
}

@inproceedings{avramidis-etal-2012-richly,
    title = "A Richly Annotated, Multilingual Parallel Corpus for Hybrid Machine Translation",
    author = "Avramidis, Eleftherios  and
      Costa-juss{\`a}, Marta R.  and
      Federmann, Christian  and
      van Genabith, Josef  and
      Melero, Maite  and
      Pecina, Pavel",
    booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
    month = may,
    year = "2012",
    address = "Istanbul, Turkey",
    publisher = "European Language Resources Association (ELRA)",
    url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/444_Paper.pdf",
    pages = "2189--2193",
    abstract = "In recent years, machine translation (MT) research has focused on investigating how hybrid machine translation as well as system combination approaches can be designed so that the resulting hybrid translations show an improvement over the individual Â“componentÂ” translations. As a first step towards achieving this objective we have developed a parallel corpus with source text and the corresponding translation output from a number of machine translation engines, annotated with metadata information, capturing aspects of the translation process performed by the different MT systems. This corpus aims to serve as a basic resource for further research on whether hybrid machine translation algorithms and system combination techniques can benefit from additional (linguistically motivated, decoding, and runtime) information provided by the different systems involved. In this paper, we describe the annotated corpus we have created. We provide an overview on the component MT systems and the XLIFF-based annotation format we have developed. We also report on first experiments with the ML4HMT corpus data.",
}

@inproceedings{henriquez2010upc,
  title={UPC-BMIC-VDU system description for the IWSLT 2010: testing several collocation segmentations in a phrase-based SMT system},
  author={Henriquez, Carlos A and Ruiz Costa-Juss{\`a}, Marta and Daudaravicius, Vidas and Banchs, Rafael E and Mari{\~n}o, Jos{\'e} B},
  booktitle={Proceedings of IWSLT 2010, Paris, France},
  pages={189--195},
  year={2010}
}

@inproceedings{ruiz2010integration,
  title={Integration of statistical collocation segmentations in a phrase-based statistical machine translation system},
  author={Ruiz Costa-Juss{\`a}, Marta and Daudaravicius, Vidas and Banchs, Rafael E},
  booktitle={EAMT 2010: proceedings of the 14th annual conference of the European Association for Machine Translation},
  year={2010}
}

@inproceedings{ruiz2010plagiarism,
  title={Plagiarism detection using information retrieval and similarity measures based on image processing techniques},
  author={Ruiz Costa-Juss{\`a}, Marta and Banchs, Rafael E and Grivolla, Jens and Codina, Joan},
  booktitle={Notebook Papers of CLEF 2010 Labs and Workshops, 22-23 September, Padua, Italy, September 2010},
  year={2010}
}

@article{marino2005modelo,
  title={Modelo estoc{\'a}stico de traducci{\'o}n basado en N-gramas de tuplas biling{\"u}es y combinaci{\'o}n log-lineal de caracter{\'\i}sticas},
  author={Mari{\~n}o, Jos{\'e} B and Crego, Josep M{\textordfeminine} and Lambert, Patrik and Banchs, Rafael and de Gispert, Adri{\`a} and Fonollosa, Jos{\'e} A. R. and Costa-Juss{\'a}, Marta R},
  journal={Procesamiento del Lenguaje Natural},
  number={35},
  pages={69--76},
  year={2005},
  publisher={Sociedad Espa{\~n}ola para el Procesamiento del Lenguaje Natural}
}

@article{Costajuss2019AnAO,
  title={An analysis of gender bias studies in natural language processing},
  author={Marta R. Costa-juss{\`a}},
  journal={Nature Machine Intelligence},
  year={2019},
  month={oct},
  pages={1-2}
}

@article{betanzos2017tradares,
  title={Tradares: A tool for the automatic evaluation of human translation quality within an MOOC environment},
  author={Betanzos, Miguel and Ruiz Costa-Juss{\`a}, Marta and Belanche Mu{\~n}oz, Luis Antonio},
  journal={Applied artificial intelligence},
  volume={31},
  number={3},
  pages={288--297},
  year={2017}
}

@inproceedings{rodriguez-guasch-costa-jussa-2016-wmt,
    title = "{WMT} 2016 Multimodal Translation System Description based on Bidirectional Recurrent Neural Networks with Double-Embeddings",
    author = "Rodr{\'\i}guez Guasch, Sergio  and
      Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the First Conference on Machine Translation: Volume 2, Shared Task Papers",
    month = aug,
    year = "2016",
    address = "Berlin, Germany",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W16-2362",
    doi = "10.18653/v1/W16-2362",
    pages = "655--659",
}

@article{costa2016introduction,
  title={Introduction to the special issue on cross-language algorithms and applications},
  author={Costa-juss{\`a}, Marta R and Bangalore, Srinivas and Lambert, Patrik and M{\`a}rquez, Llu{\'\i}s and Montiel-Ponsoda, Elena},
  journal={Journal of Artificial Intelligence Research},
  volume={55},
  pages={1--15},
  year={2016}
}

@article{melero2016selection,
  title={Selection of correction candidates for the normalization of Spanish user-generated content},
  author={Melero, Maite and Costa-juss{\`a}, Marta R and Lambert, Patrik and Quixal, Mart{\'\i}},
  journal={Natural Language Engineering},
  volume={22},
  number={1},
  pages={135--161},
  year={2016},
  publisher={Cambridge University Press}
}

@inproceedings{costa-jussa-2015-ongoing,
    title = "Ongoing Study for Enhancing {C}hinese-{S}panish Translation with Morphology Strategies",
    author = "Costa-juss{\`a}, Marta R.",
    booktitle = "Proceedings of the Fourth Workshop on Hybrid Approaches to Translation ({H}y{T}ra)",
    month = jul,
    year = "2015",
    address = "Beijing",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W15-4109",
    doi = "10.18653/v1/W15-4109",
    pages = "56--60",
}

@inproceedings{ruiz-costa-jussa-quirk-2013-morphological,
    title = "Morphological, Syntactical and Semantic Knowledge in Statistical Machine Translation",
    author = "Ruiz Costa-juss{\`a}, Marta  and
      Quirk, Chris",
    booktitle = "NAACL HLT 2013 Tutorial Abstracts",
    month = jun,
    year = "2013",
    address = "Atlanta, Georgia",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/N13-4006",
    pages = "16--18",
}

@inproceedings{ruiz2012initial,
  title={Initial approaches on Cross-Lingual Information Retrieval using Statistical Machine Translation on user-queries},
  author={Ruiz Costa-Juss{\`a}, Marta and Paz-Trillo, Christian and Wassermann, Renata},
  booktitle={ONTOBRAS-MOST: Joint V Seminar on Ontology Research in Brazil and VII International Workshop on Metamodels, Ontologies and Semantic Technologies: Recife, Brazil, September 19-21, 2012: proceedings},
  pages={25--35},
  year={2012}
}

@article{costa2011evaluacion,
  title={Evaluaci{\'o}n de estrategias para la traducci{\'o}n autom{\'a}tica estad{\'\i}stica de chino a castellano con el ingl{\'e}s como lengua pivote},
  author={Costa-Juss{\'a}, Marta R and Henr{\'\i}quez, Carlos and Banchs, Rafael E and Mari{\~n}o, Jos{\'e} B},
  journal={Procesamiento del lenguaje natural},
  volume={47},
  pages={119--126},
  year={2011}
}

@inproceedings{r-costa-jussa-banchs-2011-bm,
    title = "The {BM}-{I}2{R} {H}aitian-Cr{\'e}ole-to-{E}nglish translation system description for the {WMT} 2011 evaluation campaign",
    author = "R. Costa-juss{\`a}, Marta  and
      Banchs, Rafael E.",
    booktitle = "Proceedings of the Sixth Workshop on Statistical Machine Translation",
    month = jul,
    year = "2011",
    address = "Edinburgh, Scotland",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W11-2156",
    pages = "452--456",
}

@incollection{costa2011sentence,
  title={Sentence Alignment by Means of Cross-Language Information Retrieval},
  author={Costa-jussa, Marta R and Banchs, Rafael E},
  booktitle={Speech and Language Technologies},
  year={2011},
  publisher={IntechOpen}
}

@article{costa2011recursive,
  title={Recursive alignment block classification technique for word reordering in statistical machine translation},
  author={Costa-Juss{\`a}, Marta R and Fonollosa, Jos{\'e} A. R. and Monte, Enric},
  journal={Language resources and evaluation},
  volume={45},
  number={2},
  pages={165--179},
  year={2011},
  publisher={Springer}
}

@inproceedings{fonollosa-etal-2009-talp,
    title = "The {TALP}-{UPC} Phrase-Based Translation System for {EACL}-{WMT} 2009",
    author = "Fonollosa, Jos{\'e} A. R.  and
      Khalilov, Maxim  and
      Costa-juss{\`a}, Marta R.  and
      Mari{\~n}o, Jos{\'e} B.  and
      Henr{\'a}quez Q., Carlos A.  and
      Hern{\'a}ndez H., Adolfo  and
      Banchs, Rafael E.",
    booktitle = "Proceedings of the Fourth Workshop on Statistical Machine Translation",
    month = mar,
    year = "2009",
    address = "Athens, Greece",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W09-0414",
    pages = "85--89",
}

@article{branchs2009extraccion,
  title={Extracci{\'o}n crossling{\"u}e de documentos usando mapas sem{\'a}nticos no-lineales},
  author={Branchs, Rafael E and Costa-Juss{\'a}, Marta R},
  journal={Procesamiento del lenguaje natural},
  volume={43},
  pages={169--176},
  year={2009}
}

@inproceedings{khalilov-etal-2008-talp,
    title = "The {TALP}-{UPC} {N}gram-Based Statistical Machine Translation System for {ACL}-{WMT} 2008",
    author = "Khalilov, Maxim  and
      Hern{\'a}ndez H., Adolfo  and
      Costa-juss{\`a}, Marta R.  and
      Crego, Josep M.  and
      Henr{\'\i}quez Q., Carlos A.  and
      Lambert, Patrik  and
      Fonollosa, Jos{\'e} A. R.  and
      Mari{\~n}o, Jos{\'e} B.  and
      Banchs, Rafael E.",
    booktitle = "Proceedings of the Third Workshop on Statistical Machine Translation",
    month = jun,
    year = "2008",
    address = "Columbus, Ohio",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W08-0315",
    pages = "127--130",
}

@INPROCEEDINGS{ruiz2008Computing,
author = {Ruiz, M. and Fonollosa, Jos{\'e} A. R.},
year = {2008},
month = {Oct},
pages = {82--88},
url = {http://www.mt-archive.info/AMTA-2008-Costa-jussa.pdf},
booktitle = {Conference of the Association for Machine Translation in the Americas},
title = {Computing multiple weighted reordering hypotheses for a statistical machine translation phrase-based system}
}

@article{costa2005tecnicas,
  title={T{\'e}cnicas mejoradas para la traducci{\'o}n basada en frases},
  author={Costa-juss, Marta Ruiz and Fonollosa, Jos{\'e} A. R.},
  journal={Procesamiento del Lenguaje Natural},
  volume={35},
  year={2005}
}

@inproceedings{ruizEtal:2020:LREC,
    title = "GeBioToolkit: Automatic Extraction of Gender-Balanced Multilingual Corpus of Wikipedia Biographies",
    author = "Costa-juss{\`a}, Marta and Li Lin, Pau and Espa{\~n}a-Bonet, Cristina",
    booktitle = "Proceedings of the Twelfth International Conference on Language Resources and Evaluation (LREC 2020)",
    month = may,
    year = "2020",
    address = "Marseille, France",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://www.aclweb.org/anthology/2020.lrec-1.502/",
    doi = "",
    pages = "4081--4088"
}

@inproceedings{webster-etal-2019-gendered,
    title = "Gendered Ambiguous Pronoun ({GAP}) Shared Task at the Gender Bias in {NLP} Workshop 2019",
    author = "Webster, Kellie  and Costa-juss{\`a}, Marta R.  and Hardmeier, Christian  and Radford, Will",
    booktitle = "Proceedings of the First Workshop on Gender Bias in Natural Language Processing",
    month = aug,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W19-3801",
    doi = "10.18653/v1/W19-3801",
    pages = "1--7",
    abstract = "The 1st ACL workshop on Gender Bias in Natural Language Processing included a shared task on gendered ambiguous pronoun (GAP) resolution. This task was based on the coreference challenge defined in Webster et al. (2018), designed to benchmark the ability of systems to resolve pronouns in real-world contexts in a gender-fair way. 263 teams competed via a Kaggle competition, with the winning system achieving logloss of 0.13667 and near gender parity. We review the approaches of eleven systems with accepted description papers, noting their effective use of BERT (Devlin et al., 2018), both via fine-tuning and for feature extraction, as well as ensembling.",
}

@inproceedings{alfaro-etal-2019-bert,
    title = "{BERT} Masked Language Modeling for Co-reference Resolution",
    author = "Alfaro, Felipe  and
      Costa-juss{\`a}, Marta R.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the First Workshop on Gender Bias in Natural Language Processing",
    month = aug,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W19-3811",
    doi = "10.18653/v1/W19-3811",
    pages = "76--81",
    abstract = "This paper explains the TALP-UPC participation for the Gendered Pronoun Resolution shared-task of the 1st ACL Workshop on Gender Bias for Natural Language Processing. We have implemented two models for mask language modeling using pre-trained BERT adjusted to work for a classification problem. The proposed solutions are based on the word probabilities of the original BERT model, but using common English names to replace the original test names.",
}

@inproceedings{aldon2016neural,
  title={Neural machine translation using bitmap fonts},
  author={Ald{\'o}n M{\'\i}nguez, David and Ruiz Costa-Juss{\`a}, Marta and Fonollosa, Jos{\'e} A. R.},
  booktitle={Proceedings of the EAMT 2016 Fifth Workshop on Hybrid Approaches to Translation (HyTra)},
  pages={1--9},
  year={2016}
}

@article{costa2015domain,
  title={Domain adaptation strategies in statistical machine translation: a brief overview},
  author={Costa-Juss{\`a}, Marta R},
  journal={The Knowledge Engineering Review},
  volume={30},
  number={5},
  pages={514--520},
  year={2015},
  publisher={Cambridge University Press}
}

@article{ruiz2015traduccion,
  title={Traducci{\'o}n autom{\'a}tica entre chino y espa{\~n}ol:?` d{\'o}nde estamos?},
  author={Ruiz Costa-Juss{\`a}, Marta},
  journal={Komputer Sapiens},
  volume={1},
  pages={16--19},
  year={2015}
}

@article{costa2015segmentation,
  title={Segmentation Strategies to Face Morphology Challenges in Brazilian-Portuguese/English Statistical Machine Translation and Its Integration in Cross-Language Information Retrieval},
  author={Costa-Jussa, Marta R},
  journal={Computaci{\'o}n y Sistemas},
  volume={19},
  number={2},
  pages={357--370},
  year={2015},
  publisher={Centro de Investigaci{\'o}n en computaci{\'o}n, IPN}
}

@article{rodriguez2015hybrid,
  title={Hybrid machine translation: integration of linguistics and statistics},
  author={Fonollosa, Jos{\'e} A. R. and Ruiz Costa-Juss{\`a}, Marta},
  journal={Computer speech and language},
  volume={32},
  number={1},
  pages={1--2},
  year={2015},
  publisher={Elsevier}
}

@article{ruiz2014there,
  title={Is there Hope for Interlingua methods? A CLIR comparison experiment between Interlingua and Query Translation},
  author={Ruiz Costa-Juss{\`a}, Marta and Banchs Mart{\'\i}nez, Rafael Enrique},
  journal={Research in computing science},
  volume={74},
  pages={81--87},
  year={2014}
}

@article{centelles2014line,
  title={On-line and off-line chinese-portuguese translation service for mobile applications},
  author={Centelles, Jordi and Costa-Juss{\`a}, Marta R and Banchs, Rafael E and Gelbukh, Alexander},
  journal={Computaci{\'o}n y Sistemas},
  volume={18},
  number={3},
  pages={603--610},
  year={2014},
  publisher={Centro de Investigaci{\'o}n en computaci{\'o}n, IPN}
}

@inproceedings{centelles2014ir,
  title={An IR-based strategy for supporting chinese-portuguese translation services in off-line mode},
  author={Centelles, Jordi and Costa-Juss{\`a}, Marta Ruiz and Banchs, Rafael E and Gelbukh, Alexander},
  booktitle={International Conference on Intelligent Text Processing and Computational Linguistics},
  pages={324--330},
  year={2014},
  organization={Springer}
}

@inproceedings{centelles2014client,
  title={A Client mobile application for Chinese-Spanish statistical machine translation},
  author={Centelles, Jordi and Costa-juss{\`a}, Marta R and Banchs, Rafael E},
  booktitle={Fifteenth Annual Conference of the International Speech Communication Association},
  year={2014}
}

@inproceedings{costa-jussa-etal-2013-workshop,
    title = "Workshop on Hybrid Approaches to Translation: Overview and Developments",
    author = "Costa-juss{\`a}, Marta R.  and
      Banchs, Rafael  and
      Rapp, Reinhard  and
      Lambert, Patrik  and
      Eberle, Kurt  and
      Babych, Bogdan",
    booktitle = "Proceedings of the Second Workshop on Hybrid Approaches to Translation",
    month = aug,
    year = "2013",
    address = "Sofia, Bulgaria",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W13-2801",
    pages = "1--6",
}

@inproceedings{10.5555/2540128.2540596,
author = {Costa-Juss\`{a}, Marta R. and Henr\'{\i}quez, Carlos A. and Banchs, Rafael E.},
title = {Evaluating Indirect Strategies for Chinese-Spanish Statistical Machine Translation: Extended Abstract},
year = {2013},
isbn = {9781577356332},
publisher = {AAAI Press},
booktitle = {Proceedings of the Twenty-Third International Joint Conference on Artificial Intelligence},
pages = {3142–3145},
numpages = {4},
location = {Beijing, China},
series = {IJCAI ’13}
}
  
@inproceedings{costa-jussa-etal-2010-automatic,
    title = "Automatic and Human Evaluation Study of a Rule-based and a Statistical {C}atalan-{S}panish Machine Translation Systems",
    author = "Costa-juss{\`a}, Marta R.  and
      Farr{\'u}s, Mireia  and
      Mari{\~n}o, Jos{\'e} B.  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
    month = may,
    year = "2010",
    address = "Valletta, Malta",
    publisher = "European Language Resources Association (ELRA)",
    url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/47_Paper.pdf",
    abstract = "Machine translation systems can be classified into rule-based and corpus-based approaches, in terms of their core technology. Since both paradigms have largely been used during the last years, one of the aims in the research community is to know how these systems differ in terms of translation quality. To this end, this paper reports a study and comparison of a rule-based and a corpus-based (particularly, statistical) Catalan-Spanish machine translation systems, both of them freely available in the web. The translation quality analysis is performed under two different domains: journalistic and medical. The systems are evaluated by using standard automatic measures, as well as by native human evaluators. Automatic results show that the statistical system performs better than the rule-based system. Human judgements show that in the Spanish-to-Catalan direction the statistical system also performs better than the rule-based system, while in the Catalan-to-Spanish direction is the other way round. Although the statistical system obtains the best automatic scores, its errors tend to be more penalized by human judgements than the errors of the rule-based system. This can be explained because statistical errors are usually unexpected and they do not follow any pattern.",
}

@article{costa2019chinese,
  title={Chinese-catalan: A neural machine translation approach based on pivoting and attention mechanisms},
  author={Costa-Juss{\`a}, Marta R and Casas, No{\'e} and Escolano, Carlos and Fonollosa, Jos{\'e} A. R.},
  journal={ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)},
  volume={18},
  number={4},
  pages={1--8},
  year={2019},
  publisher={ACM New York, NY, USA}
}

@conference{icaart10,
author={Marta R. Costa-jussà and Rafael E. Banchs and Joan Codina},
title={WHERE ARE YOU FROM? - Tell me HOW you Write and I Will Tell you WHO you Are},
booktitle={Proceedings of the 2nd International Conference on Agents and Artificial Intelligence - Volume 1: ICAART,},
year={2010},
pages={406-410},
publisher={SciTePress},
organization={INSTICC},
doi={10.5220/0002580504060410},
isbn={978-989-674-021-4},
}

@article{farrus2010automatic,
  title={Automatic evaluation of continuous assessment tests},
  author={Farrus, Mireia and Ruiz Costa-Juss{\`a}, Marta and Cobo, German and Garc{\'\i}a Sol{\'o}rzano, David and Villarejo Mu{\~n}{\'o}z, Luis and Banchs, Rafael E},
  year={2010}
}

@article{costa2009phrase,
  title={Phrase and ngram-based statistical machine translation system combination},
  author={Costa-Juss{\`a}, Marta R and Fonollosa, Jos{\'e} A. R.},
  journal={Applied Artificial Intelligence},
  volume={23},
  number={7},
  pages={694--711},
  year={2009},
  publisher={Taylor \& Francis}
}

@INPROCEEDINGS{ruiz2009barcelona,
	author = {Ruiz, M. and Banchs, R.},
    year = {2009},
    month = {Dec},
    pages = {24--28},
    url = {http://www.mt-archive.info/IWSLT-2009-Costa-jussa.pdf; http://www.isca-speech.org/archive/iwslt_09/slt9_024.html},
    booktitle = {International Workshop on Spoken Language Translation},
    title = {Barcelona Media SMT system description for the IWSLT 2009: introducing source context information}
}

@inproceedings{costa2005tuning,
  title={Tuning a phrase-based statistical translation system for the IWSLT 2005 Chinese to English and Arabic to English tasks},
  author={Costa-juss{\`a}, Marta R and Fonollosa, Jos{\'e} A. R.},
  booktitle={International Workshop on Spoken Language Translation (IWSLT) 2005},
  year={2005}
}

@INPROCEEDINGS{ruiz2004normalizacion,
	author = {Ruiz, M. and Gauvain, J. and Galibert, O.},
	year = {2004},
	month = {Sep},
	pages = {153--158},
	url = {http://lorien.die.upm.es/~lapiz/rtth/JORNADAS/III/actas3JTH.pdf},
	booktitle = {Jornadas en Tecnologías del Habla},
	title = {Normalización de textos y selección del vocabulario para estimar el modelo de lenguaje de un sistema de transcripción de noticias}
}

@article{costa2020mt,
  title={MT-Adapted Datasheets for Datasets: Template and Repository},
  author={Costa-juss{\`a}, Marta R and Creus, Roger and Domingo, Oriol and Dom{\'\i}nguez, Albert and Escobar, Miquel and L{\'o}pez, Cayetana and Garcia, Marina and Geleta, Margarita},
  journal={arXiv preprint arXiv:2005.13156},
  year={2020},
  url="https://arxiv.org/abs/2005.13156"
}

@inproceedings{costa-jussa-etal-2020-abusive,
    title = "Abusive language in {S}panish children and young teenager{'}s conversations: data preparation and short text classification with contextual word embeddings",
    author = "Costa-juss{\`a}, Marta R.  and
      Gonz{\'a}lez, Esther  and
      Moreno, Asuncion  and
      Cumalat, Eudald",
    booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference",
    month = may,
    year = "2020",
    address = "Marseille, France",
    publisher = "European Language Resources Association",
    url = "https://www.aclweb.org/anthology/2020.lrec-1.191",
    pages = "1533--1537",
    abstract = "Abusive texts are reaching the interests of the scientific and social community. How to automatically detect them is onequestion that is gaining interest in the natural language processing community. The main contribution of this paper is toevaluate the quality of the recently developed {''}Spanish Database for cyberbullying prevention{''} for the purpose of trainingclassifiers on detecting abusive short texts. We compare classical machine learning techniques to the use of a more ad-vanced model: the contextual word embeddings in the particular case of classification of abusive short-texts for the Spanishlanguage. As contextual word embeddings, we use Bidirectional Encoder Representation from Transformers (BERT), pro-posed at the end of 2018. We show that BERT mostly outperforms classical techniques. Far beyond the experimentalimpact of our research, this project aims at planting the seeds for an innovative technological tool with a high potentialsocial impact and aiming at being part of the initiatives in artificial intelligence for social good.",
    language = "English",
    ISBN = "979-10-95546-34-4",
}

@article{armengol2020enriching,
  title={Enriching the Transformer with Linguistic and Semantic Factors for Low-Resource Machine Translation},
  author={Armengol-Estap{\'e}, Jordi and Costa-juss{\`a}, Marta R and Escolano, Carlos},
  journal={arXiv preprint arXiv:2004.08053},
  year={2020},
  url="https://arxiv.org/abs/2004.08053"
}

@article{ruizEtal:2020,
    title = "Multilingual and Interlingual Semantic Representations for Natural Language Processing: A Brief Introduction",
    author = "Costa-juss{\`a}, Marta  and Espa{\~n}a-Bonet, Cristina and Fung, Pascale and Smith, Noah A.",
    publisher = {MIT Press},
    address = {Cambridge, MA, USA},
    journal = {Computational Linguistics},
    month = mar,
    year = "2020",
    doi = "10.1162/COLI_a_00373",
    pages = "1--8",
    url = "http://www.mitpressjournals.org/doi/pdf/10.1162/COLI_a_00373"
}

@proceedings{ws-2019-gender,
    title = "Proceedings of the First Workshop on Gender Bias in Natural Language Processing",
    editor = "Costa-juss{\`a}, Marta R.  and Hardmeier, Christian  and Radford, Will  and Webster, Kellie",
    month = aug,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W19-3800",
}

@inproceedings{costa-jussa-etal-2017-byte,
    title = "Byte-based Neural Machine Translation",
    author = "Costa-juss{\`a}, Marta R.  and
      Escolano, Carlos  and
      Fonollosa, Jos{\'e} A. R.",
    booktitle = "Proceedings of the First Workshop on Subword and Character Level Models in {NLP}",
    month = sep,
    year = "2017",
    address = "Copenhagen, Denmark",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W17-4123",
    doi = "10.18653/v1/W17-4123",
    pages = "154--158",
    abstract = "This paper presents experiments comparing character-based and byte-based neural machine translation systems. The main motivation of the byte-based neural machine translation system is to build multi-lingual neural machine translation systems that can share the same vocabulary. We compare the performance of both systems in several language pairs and we see that the performance in test is similar for most language pairs while the training time is slightly reduced in the case of byte-based neural machine translation.",
}

@article{costa2016morphology,
  title={Morphology generation for statistical machine translation using deep learning techniques},
  author={Costa-juss{\`a}, Marta R and Escolano, Carlos},
  journal={arXiv preprint arXiv:1610.02209},
  year={2016}
}

@article{ruiz2019dades,
  title={Dades, m{\`a}quina i {\`e}tica},
  author={Ruiz Costa-Juss{\`a}, Marta},
  journal={Pol{\'\i}tica i prosa},
  number={4},
  pages={61--62},
  year={2019}
}

@article{costa2018experimental,
  title={Experimental research on encoder-decoder architectures with attention for chatbots},
  author={Costa-juss{\`a}, Marta R and Nuez, {\'A}lvaro and Segura, Carlos},
  journal={Computaci{\'o}n y Sistemas},
  volume={22},
  number={4},
  year={2018}
}

@inproceedings{Costa-Jussà2018panel,
  author={Ruiz Costa-Juss{\`a}, Marta},
  title={Panel discussion on Speech technologies: Industry and Academy},
  year=2018,
  booktitle={Proc. IberSPEECH 2018}
}

@inproceedings{ruiz2018english,
  title={English-catalan neural machine translation in the biomedical domain through the cascade approach},
  author={Ruiz Costa-Juss{\`a}, Marta and Casas, Noe and Melero, Maite},
  booktitle={LREC 2018: Workshop MultilingualBIO: Multilingual Biomedical Text Processing: proceedings},
  pages={21--24},
  year={2018}
}

@article{costa2017deepvoice,
  title={DeepVoice: tecnolog{\'\i}as de aprendizaje profundo aplicadas al procesado de voz y audio},
  author={Costa-Juss{\'a}, Marta R and Fonollosa, Jos{\'e} A. R.},
  journal={Procesamiento del Lenguaje Natural},
  volume={59},
  pages={117--120},
  year={2017}
}

@article{fonollosa2019joint,
  title={Joint Source-Target Self Attention with Locality Constraints},
  author={Fonollosa, Jos{\'e} A. R. and Casas, Noe and Costa-juss{\`a}, Marta R},
  journal={arXiv preprint arXiv:1905.06596},
  year={2019},
  url="https://arxiv.org/abs/1905.06596"
}

@article{escolano2017integration,
  title={Integration of morphology generation techniques based on deep learning into a statistical machine translation system},
  author={Escolano, Carlos and Costa-jussa, Marta R},
  journal={PROCESAMIENTO DEL LENGUAJE NATURAL},
  number={59},
  pages={107--114},
  year={2017},
  publisher={SOC ESPANOLA PROCESAMIENTO LENGUAJE NATURAL-SEPLN DEPT LENGUAJES \& SISTEMAS~…}
}

@article{costa2017deep,
  title={Deep Learning Technologies for Speech and Audio Processing},
  author={Costa-jussa, Marta R and Fonollosa, Jos{\'e} A. R.},
  journal={PROCESAMIENTO DEL LENGUAJE NATURAL},
  number={59},
  pages={117--120},
  year={2017},
  publisher={SOC ESPANOLA PROCESAMIENTO LENGUAJE NATURAL-SEPLN DEPT LENGUAJES \& SISTEMAS~…}
}

@InProceedings{10.1007/978-3-319-75487-1_2,
author="R. Costa-juss{\`a}, Marta
and Fonollosa, Jos{\'e} A. R.",
editor="Gelbukh, Alexander",
title="Combining Phrase and Neural-Based Machine Translation: What Worked and Did Not",
booktitle="Computational Linguistics and Intelligent Text Processing",
year="2018",
publisher="Springer International Publishing",
address="Cham",
pages="17--26",
abstract="Phrase-based machine translation assumes that all words are at the same distance and translates them using feature functions that approximate the probability at different levels. On the other hand, neural machine translation infers a word embedding and translates these word vectors using a neural model. At the moment, both approaches co-exist and are being intensively investigated.",
isbn="978-3-319-75487-1"
}

@proceedings{ws-2016-hybrid,
    title = "Proceedings of the Sixth Workshop on Hybrid Approaches to Translation ({H}y{T}ra6)",
    editor = "Lambert, Patrik  and Babych, Bogdan  and Eberle, Kurt  and Banchs, Rafael E.  and Rapp, Reinhard  and Costa-juss{\`a}, Marta R.",
    month = dec,
    year = "2016",
    address = "Osaka, Japan",
    publisher = "The COLING 2016 Organizing Committee",
    url = "https://www.aclweb.org/anthology/W16-4500",
}

@article{costa2016integracion,
  title={Integraci{\'o}n de Paradigmas de Traducci{\'o}n Autom{\'a}tica (IMTraP)},
  author={Costa-Juss{\`a}, Marta R},
  journal={Procesamiento del Lenguaje Natural},
  number={57},
  pages={135--138},
  year={2016},
  publisher={Sociedad Espa{\~n}ola para el Procesamiento del Lenguaje Natural}
}

@proceedings{ws-2015-hybrid,
    title = "Proceedings of the Fourth Workshop on Hybrid Approaches to Translation ({H}y{T}ra)",
    editor = "Babych, Bogdan  and
      Eberle, Kurt  and
      Lambert, Patrik  and
      Rapp, Reinhard  and
      Banchs, Rafael E.  and
      Costa-juss{\`a}, Marta R.",
    month = jul,
    year = "2015",
    address = "Beijing",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W15-4100",
    doi = "10.18653/v1/W15-41",
}

@proceedings{ws-2014-hybrid,
    title = "Proceedings of the 3rd Workshop on Hybrid Approaches to Machine Translation ({H}y{T}ra)",
    editor = "Banchs, Rafael E.  and
      Costa-juss{\`a}, Marta R.  and
      Rapp, Reinhard  and
      Lambert, Patrik  and
      Eberle, Kurt  and
      Babych, Bogdan",
    month = apr,
    year = "2014",
    address = "Gothenburg, Sweden",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W14-1000",
    doi = "10.3115/v1/W14-10",
}

@article{ruiz2013communicating,
  title={Communicating China and Latin America},
  author={Ruiz Costa-Juss{\`a}, Marta},
  year={2013}
}

@proceedings{ws-2013-hybrid,
    title = "Proceedings of the Second Workshop on Hybrid Approaches to Translation",
    editor = "Costa-juss{\`a}, Marta Ruiz  and
      Rapp, Reinhard  and
      Lambert, Patrik  and
      Eberle, Kurt  and
      Banchs, Rafael E.  and
      Babych, Bogdan",
    month = aug,
    year = "2013",
    address = "Sofia, Bulgaria",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W13-2800",
}

@proceedings{ws-2012-applying,
    title = "Proceedings of the Second Workshop on Applying Machine Learning Techniques to Optimise the Division of Labour in Hybrid {MT}",
    editor = "van Genabith, Josef  and
      Badia, Toni  and
      Federmann, Christian  and
      Melero, Maite  and
      Costa-juss{\`a}, Marta R.  and
      Okita, Tsuyoshi",
    month = dec,
    year = "2012",
    address = "Mumbai, India",
    publisher = "The COLING 2012 Organizing Committee",
    url = "https://www.aclweb.org/anthology/W12-5700",
}

@proceedings{ws-2012-joint,
    title = "Proceedings of the Joint Workshop on Exploiting Synergies between Information Retrieval and Machine Translation ({ESIRMT}) and Hybrid Approaches to Machine Translation ({H}y{T}ra)",
    editor = "Costa-juss{\`a}, Marta R.  and
      Lambert, Patrik  and
      Banchs, Rafael E.  and
      Rapp, Reinhard  and
      Babych, Bogdan",
    month = apr,
    year = "2012",
    address = "Avignon, France",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/W12-0100",
}

@inproceedings{perez2012search,
  title={Search engine for multilingual audiovisual contents},
  author={P{\'e}rez, Jos{\'e} David and Bonafonte C{\'a}vez, Antonio and Ruiz Costa-Juss{\`a}, Marta and Cardenal, Antonio and Fonollosa, Jos{\'e} A. R. and Moreno Bilbao, M Asunci{\'o}n and Navas, Eva and Rodr{\'\i}guez Banga, Eduardo},
  booktitle={Proceedings: IberSPEECH 2012: VII Jornadas en Tecnolog{\'\i}a del Habla and III Iberian SLTech Workshop: November 21-23, 2012 Escuela Polit{\'e}cnica Superior Universidad Aut{\'o}noma de Madrid, Madrid, Spain},
  pages={422--430},
  year={2012}
}

@INPROCEEDINGS{5948888,
  author={M. R. {Costa-Jussa} and G. {Cobo} and J. {Duran} and D. {Garcia} and F. {Cortes}},
  booktitle={2011 Promotion and Innovation with New Technologies in Engineering Education (FINTDI 2011)}, 
  title={Automatic test assessment web platform}, 
  year={2011},
  volume={},
  number={},
  pages={1-4},}
  
@article{costa2010using,
  title={Using collocation segmentation to extract translation units in a phrase-based statistical machine translation system},
  author={Costa-Juss{\'a}, Marta R and Daudaravicius, Vidas and Banchs, Rafael E},
  journal={Procesamiento del lenguaje natural},
  volume={45},
  pages={215--220},
  year={2010}
}

@inproceedings{banchs2010uso,
  title={Uso de mapas sem{\'a}nticos para la b{\'u}squeda crosslingue de oraciones paralelas},
  author={Banchs Mart{\'\i}nez, Rafael Enrique and Ruiz Costa-Juss{\`a}, Marta},
  booktitle={Actas del I Congreso Espa{\~n}ol de Recuperaci{\'o}n de Informaci{\'o}n (CERI 2010), Madrid, Espa{\~n}a, 15 y 16 de junio de 2010},
  year={2010}
}

@inproceedings{farrus2009talp,
  title={The TALP on-line Spanish-Catalan machine-translation system},
  author={Farr{\'u}s, Mireia and Poch, Marc and Costa-juss{\`a}, Marta R and Mari{\~n}o Acebal, Jos{\'e} B and Hern{\'a}ndez, Adolfo and Henr{\'\i}quez, Carlos and Fonollosa, Jos{\'e} A. R.},
  booktitle={Teixeira A, Sales M, Braga D, editors. I Iberian SLTech 2009. Proceedings of the I Joint SIG-IL/Microsoft Workshop on Speech and Language Technologies for Iberian Languages; 2009 Sep 3-4; Porto Salvo, Portugal. Portugal: Designeed; 2009. p. 105.},
  year={2009},
  organization={International Speech Communication Association (ISCA)}
}

@article{costa2008generating,
  title={Generating multiple weighted reordering hypotheses for an SMT system},
  author={Costa-jussa, Marta R and Fonollosa, Jos{\'e} A. R.},
  journal={PROCESAMIENTO DEL LENGUAJE NATURAL},
  number={41},
  pages={267--272},
  year={2008},
  publisher={SOC ESPANOLA PROCESAMIENTO LENGUAJE NATURAL-SEPLN DEPT LENGUAJES \& SISTEMAS~…}
}

@article{farrus2008n,
  title={N-II: Traductor autom{\'a}tico estad{\'\i}stico basado en N-gramas},
  author={Farr{\'u}s, Mireia and Costa-juss{\`a}, Marta R and Poch, Marc and Hern{\'a}ndez, Adolfo and Mari{\~n}o Acebal, Jos{\'e} B},
  journal={V Jornadas en Tecnolog{\'\i}a del Habla; 2008 Nov. 12-14; Bilbao (Espa{\~n}a). Bilbao: Universidad del Pa{\'\i}s Vasco; 2008. p. 277-80.},
  year={2008},
  publisher={Universidad del Pa{\'\i}s Vasco}
}

@article{fonollosa2008generacion,
  title={Generaci{\'o}n de multiples hip{\'o}tesis ponderadas de reordenamiento para un sistema de traducci{\'o}n autom{\'a}tica estad{\i}stica},
  author={Fonollosa, Jos{\'e} A. R.},
  journal={Procesamiento del lenguaje Natural},
  number={41},
  pages={267--272},
  year={2008}
}

@article{ruiz2006developing,
  title={On developing novel reordering algorithms for Statistical Machine Translation},
  author={Ruiz Costa-jussa, Marta},
  year={2006}
}

@article{costa2006statistical,
  title={Statistical System of Word Re-ordering in Machine Translation},
  author={Costa-Jussa, Marta R and Fonollosa, Jos{\'e} A. R.},
  journal={PROCESAMIENTO DEL LENGUAJE NATURAL},
  number={37},
  pages={249--255},
  year={2006},
  publisher={SOC ESPANOLA PROCESAMIENTO LENGUAJE NATURAL-SEPLN DEPT LENGUAJES \& SISTEMAS~…}
}

@article{costa2006sistema,
  title={Sistema Estad{\'\i}stico de Reordenamiento de Palabras en Traducci{\'o}n Autom{\'a}tica},
  author={Costa-juss{\'a}, Marta R and Fonollosa, Jos{\'e} A. R.},
  journal={Procesamiento del Lenguaje Natural},
  number={37},
  pages={249--255},
  year={2006},
  publisher={Sociedad Espa{\~n}ola para el Procesamiento del Lenguaje Natural}
}

@article{ruiz2005improved,
  title={Improved techniques for phrase-based translation},
  author={Ruiz Costa-Jussa, Marta and Fonollosa, Jos{\'e} A. R.},
  journal={PROCESAMIENTO DEL LENGUAJE NATURAL},
  number={35},
  pages={351--356},
  year={2005},
  publisher={SOC ESPANOLA PROCESAMIENTO LENGUAJE NATURAL-SEPLN DEPT LENGUAJES \& SISTEMAS~…}
}

@article{marino2005stochastic,
  title={Stochastic translation model based on N-grams of bilingual rows and log-linear combination of characteristics},
  author={Marino, Jose B and Crego, Josep Ma and Lambert, Patrik and Banchs, Rafael and de Gispert, Adria and Fonollosa, Jos{\'e} A. R. and Costa-Jussa, Marta R},
  journal={PROCESAMIENTO DEL LENGUAJE NATURAL},
  number={35},
  pages={69--76},
  year={2005},
  publisher={SOC ESPANOLA PROCESAMIENTO LENGUAJE NATURAL-SEPLN DEPT LENGUAJES \& SISTEMAS~…}
}

@misc {10045_1297,
   title = {Modelo estocástico de traducción basado en N-gramas de tuplas bilingües y combinación log-lineal de características},
   author = {Mariño Acebal, José Bernardo AND Banchs Martínez, Rafael Enrique AND Crego Clemente, Josep María AND Gispert Ramis, Adrià de AND Lambert, Patrik AND Fonollosa, Jos{\'e} A. R. AND Ruiz Costa-Jussà, Marta},
   year = {2005},
   month = {sep}
}

@article{montolartecnicas,
  title={T{\'E}CNICAS ESTAD{\'I}STICAS PARA EL FILTRADO DE UN CORPUS BILING{\"U}E EN TRADUCCI{\'O}N AUTOM{\'A}TICA},
  author={Montolar, Enrique and Costa-Juss{\`a}, Marta R and Fonollosa, Jos{\'e} A. R.},
  year={2008}
}

@inproceedings{basta-costa-jussa-2021-impact,
    title = "Impact of {COVID}-19 in Natural Language Processing Publications: a Disaggregated Study in Gender, Contribution and Experience",
    author = "Basta, Christine  and
      Costa-jussa, Marta R.",
    booktitle = "Proceedings of the First Workshop on Language Technology for Equality, Diversity and Inclusion",
    month = apr,
    year = "2021",
    address = "Kyiv",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2021.ltedi-1.1",
    pages = "1--6",
    abstract = "This study sheds light on the effects of COVID-19 in the particular field of Computational Linguistics and Natural Language Processing within Artificial Intelligence. We provide an inter-sectional study on gender, contribution, and experience that considers one school year (from August 2019 to August 2020) as a pandemic year. August is included twice for the purpose of an inter-annual comparison. While the trend in publications increased with the crisis, the results show that the ratio between female and male publications decreased. This only helps to reduce the importance of the female role in the scientific contributions of computational linguistics (it is now far below its peak of 0.24). The pandemic has a particularly negative effect on the production of female senior researchers in the first position of authors (maximum work), followed by the female junior researchers in the last position of authors (supervision or collaborative work).",
}

@article{casas2021sparsely,
    title={Sparsely Factored Neural Machine Translation},
    author={Noe Casas and Jose A. R. Fonollosa and Marta R. Costa-jussà},
    year={2021},
    journal={arXiv preprint arXiv:2102.08934},
    url={https://arxiv.org/abs/2102.08934}
}

@article{hardmeier2021write,
    title={How to Write a Bias Statement: Recommendations for Submissions to the Workshop on Gender Bias in NLP},
    author={Christian Hardmeier and Marta R. Costa-jussà and Kellie Webster and Will Radford and Su Lin Blodgett},
    year={2021},
    journal={arXiv preprint arXiv:2104.03026},
    url={https://arxiv.org/abs/2104.03026}
}