Chowdhury S., Stepanov A. E. and Riccardi G. Transfer of Corpus-Specific Dialogue Act Annotation to ISO Standard: Is it worth it ? (Proceeding) roc. Language Resources and Evaluation Conference , Portroz, 2016, 2016. (Abstract | Links | BibTeX | Tags: Statistical Machine Translation) Chowdhury A, Calvo M., Ghosh A., Stepanov A. E., Bayer A. O., Riccardi G., Garcia F. and Sanchis E. Selection and Aggregation Techniques for Crowdsourced Semantic Annotation Task (Conference) 2015. (Abstract | Links | BibTeX | Tags: Machine Learning, Signal Annotation and Interpretation, Statistical Machine Translation) Stepanov E., Riccardi G. and Bayer A. O. The Development of the Multilingual LUNA Corpus for Spoken Language System Porting (Conference) 2014. (Abstract | Links | BibTeX | Tags: Natural Language Processing, Speech Processing, Statistical Machine Translation) Stepanov E., Kashkarev I., Bayer A. O., Riccardi G. and Ghosh A. Language Style and Domain Adaptation for Cross-Language Porting (Conference) 2013. (Abstract | Links | BibTeX | Tags: Signal Annotation and Interpretation, Statistical Machine Translation) Garcia F., Hurtado L. F., Segarra E., Sanchis E. and Riccardi G. Combining Machine Translation Systems for Spoken Language Understanding Portability (Conference) 2012. (Abstract | Links | BibTeX | Tags: Machine Learning, Signal Annotation and Interpretation, Speech Processing, Statistical Machine Translation) Bangalore S. and Riccardi G. Stochastic Finite-State Models for Spoken Language Machine Translation (Article) Machine Translation , vol 17, n. 3, pp. 165-184, 2002 (Invited paper), 2002. (Abstract | Links | BibTeX | Tags: Statistical Machine Translation) Bangalore S., Murdock V., Riccardi G. Bootstrapping Bilingual Data Using Consensus Translation for a Multilingual Instant Messaging System (Conference) 2002. (BibTeX | Tags: Statistical Machine Translation) Bangalore S., Bordel G. and Riccardi G. Computing Consensus Translation from Multiple Machine Translation Systems (Conference) 2002. (BibTeX | Tags: Statistical Machine Translation) Bangalore S. and Riccardi G. A Finite-State Approach to Machine Translation (Conference) 2001. (BibTeX | Tags: Statistical Machine Translation) Bangalore S. and Riccardi G. Finite-state models for lexical reordering in spoken language translation (Conference) 2000. (BibTeX | Tags: Statistical Machine Translation) Bangalore S. and Riccardi G. Stochastic finite-state models for spoken language machine translation,'' Proc. Workshop on Embedded Machine Translation Systems (Conference) 2000. (BibTeX | Tags: Statistical Machine Translation)2016
title = {Transfer of Corpus-Specific Dialogue Act Annotation to ISO Standard: Is it worth it ?},
author = {Chowdhury S., Stepanov A. E. and Riccardi G.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2016/11/LREC16-DA-StandardISO.pdf},
year = {2016},
date = {2016-11-01},
publisher = {roc. Language Resources and Evaluation Conference , Portroz, 2016},
abstract = {Spoken conversation corpora often adapt existing Dialogue Act (DA) annotation specifications, such as DAMSL, DIT++, etc., to task specific needs, yielding incompatible annotations; thus, limiting corpora re-usability. Recently accepted ISO standard for DA annotation – Dialogue Act Markup Language (DiAML) – is designed as domain and application independent. Moreover, the clear separation of dialogue dimensions and communicative functions, coupled with the hierarchical organization of the latter, allows for classification at different levels of granularity. However, re-annotating existing corpora with the new scheme might require significant effort. In this paper we test the utility of the ISO standard through comparative evaluation of the corpus-specific legacy and the semi-automatically transferred DiAML DA annotations on supervised dialogue act classification task. To test the domain independence of the resulting annotations, we perform cross-domain and data aggregation evaluation. Compared to the legacy annotation scheme, on the Italian LUNA Human-Human corpus, the DiAML annotation scheme exhibits better cross-domain and data aggregation classification performance, while maintaining comparable in-domain performance.},
keywords = {Statistical Machine Translation}
}
2015
title = {Selection and Aggregation Techniques for Crowdsourced Semantic Annotation Task},
author = {Chowdhury A, Calvo M., Ghosh A., Stepanov A. E., Bayer A. O., Riccardi G., Garcia F. and Sanchis E.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2015/11/IS15-crowdsourcingSelectionAggregation.pdf},
year = {2015},
date = {2015-09-06},
journal = {Proc. INTERSPEECH , Dresden, 2015},
abstract = {Crowdsourcing is an accessible and cost-effective alternative to traditional methods of collecting and annotating data. The application of crowdsourcing to simple tasks has been well investigated.
However, complex tasks like semantic annotation transfer require workers to take simultaneous decisions on chunk segmentation and labeling while acquiring on-the-go domainspecific knowledge. The increased task complexity may generate low judgment agreement and/or poor performance. The goal of this paper is to cope with these crowdsourcing requirements with semantic priming and unsupervised quality control mechanisms. We aim at an automatic quality control that takes into account different levels of workers’ expertise and annotation task performance. We investigate the judgment selection and aggregation techniques on the task of cross-language semantic annotation
transfer. We propose stochastic modeling techniques to estimate the task performance of a worker on a particular judgment with respect to the whole worker group. These estimates are used for the selection of the best judgments as well as weighted consensus-based annotation aggregation. We demonstrate that the technique is useful for increasing the quality of collected annotations.
Index Terms: Crowdsourcing, Annotation, Cross-language porting},
keywords = {Machine Learning, Signal Annotation and Interpretation, Statistical Machine Translation}
}
However, complex tasks like semantic annotation transfer require workers to take simultaneous decisions on chunk segmentation and labeling while acquiring on-the-go domainspecific knowledge. The increased task complexity may generate low judgment agreement and/or poor performance. The goal of this paper is to cope with these crowdsourcing requirements with semantic priming and unsupervised quality control mechanisms. We aim at an automatic quality control that takes into account different levels of workers’ expertise and annotation task performance. We investigate the judgment selection and aggregation techniques on the task of cross-language semantic annotation
transfer. We propose stochastic modeling techniques to estimate the task performance of a worker on a particular judgment with respect to the whole worker group. These estimates are used for the selection of the best judgments as well as weighted consensus-based annotation aggregation. We demonstrate that the technique is useful for increasing the quality of collected annotations.
Index Terms: Crowdsourcing, Annotation, Cross-language porting2014
title = {The Development of the Multilingual LUNA Corpus for Spoken Language System Porting},
author = {Stepanov E., Riccardi G. and Bayer A. O.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2014/11/LREC14-MultilingualLUNACorpusPorting.pdf},
year = {2014},
date = {2014-01-01},
journal = {LREC , Reykjavik, 2014},
abstract = {The development of annotated corpora is a critical process in the development of speech applications for multiple target languages. While the technology to develop a monolingual speech application has reached satisfactory results (in terms of performance and effort), porting an existing application from a source language to a target language is still a very expensive task. In this paper we address the problem of creating multilingual aligned corpora and its evaluation in the context of a spoken language understanding (SLU) porting task. We discuss the challenges of the manual creation of multilingual corpora, as well as present the algorithms for the creation of multilingual SLU via Statistical Machine Translation (SMT).},
keywords = {Natural Language Processing, Speech Processing, Statistical Machine Translation}
}
2013
title = {Language Style and Domain Adaptation for Cross-Language Porting},
author = {Stepanov E., Kashkarev I., Bayer A. O., Riccardi G. and Ghosh A.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2014/11/ASRU13-LangAdaptCrossPorting.pdf},
year = {2013},
date = {2013-01-01},
journal = {IEEE Workshop on Automatic Speech Recognition and Understanding, Olomouc, 2013},
abstract = {Automatic cross-language Spoken Language Understanding porting is plagued by two limitations. First, SLU are usually trained on limited domain corpora. Second, language pair resources (e.g. aligned corpora) are scarce or unmatched in style (e.g. news vs. conversation). We present experiments on automatic style adaptation of the input for the translation systems and their output for SLU. We approach the problem of scarce aligned data by adapting the available parallel data to the target domain using limited in-domain and larger web crawled close-to-domain corpora. SLU performance is optimized by re-ranking its output with Recurrent Neural Network-based joint language model. We evaluate end-to-end SLU porting on close and distant language pairs: Spanish - Italian and Turkish - Italian; and achieve significant improvements both in translation quality and SLU performance.},
keywords = {Signal Annotation and Interpretation, Statistical Machine Translation}
}
2012
title = {Combining Machine Translation Systems for Spoken Language Understanding Portability},
author = {Garcia F., Hurtado L. F., Segarra E., Sanchis E. and Riccardi G.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2014/11/SLT12-MTPortSLU.pdf},
year = {2012},
date = {2012-01-01},
journal = {IEEE/ACL Workshop on Spoken Language Technology, Miami, 2012},
abstract = {We are interested in the problem of learning Spoken Language Understanding (SLU) models for multiple target languages. Learning such models requires annotated corpora, and porting to different languages would require corpora with parallel text translation and semantic annotations. In this paper we investigate how to learn a SLU model in a target language starting from no target text and no semantic annotation. Our proposed algorithm is based on the idea of exploiting the diversity (with regard to performance and coverage) of multiple translation systems to transfer statistically stable word-toconcept mappings in the case of the romance language pair, French and Spanish. Each translation system performs differently at the lexical level (wrt BLEU). The best translation system performances for the semantic task are gained from their combination at different stages of the portability methodology. We have evaluated the portability algorithms on the French MEDIA corpus, using French as the source language and Spanish as the target language. The experiments show the effectiveness of the proposed methods with respect to the source language SLU baseline.},
keywords = {Machine Learning, Signal Annotation and Interpretation, Speech Processing, Statistical Machine Translation}
}
2002
title = {Stochastic Finite-State Models for Spoken Language Machine Translation},
author = {Bangalore S. and Riccardi G.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2014/11/mt_journal_special-02.pdf},
year = {2002},
date = {2002-01-01},
journal = {Machine Translation , vol 17, n. 3, pp. 165-184, 2002 (Invited paper)},
abstract = {Abstract. The problem of machine translation can be viewed as consisting of two subproblems (a) lexical selection and (b) lexical reordering. In this paper, we propose stochastic finite-state models for these two subproblems. Stochastic finite-state models are efficiently learnable from data, effective for decoding and are associated with a calculus for composing models which allows for tight integration of constraints from various levels of language processing. We present a method for learning stochastic finite-state models for lexical selection and lexical reordering that are trained automatically from pairs of source and target utterances. We use this method to develop models for English–Japanese and English–Spanish translation and present the performance of these models for translation on speech and text. We also evaluate the efficacy of such a translation model in the context of a call routing task of unconstrained speech utterances.},
keywords = {Statistical Machine Translation}
}
title = {Bootstrapping Bilingual Data Using Consensus Translation for a Multilingual Instant Messaging System},
author = {Bangalore S., Murdock V., Riccardi G.},
year = {2002},
date = {2002-01-01},
journal = {COLING, Taipei, 2002},
keywords = {Statistical Machine Translation}
}
title = {Computing Consensus Translation from Multiple Machine Translation Systems},
author = {Bangalore S., Bordel G. and Riccardi G.},
year = {2002},
date = {2002-01-01},
journal = {Proc. IEEE ASRU, Madonna di Campiglio, Italy, 2001},
keywords = {Statistical Machine Translation}
}
2001
title = {A Finite-State Approach to Machine Translation},
author = {Bangalore S. and Riccardi G.},
year = {2001},
date = {2001-01-01},
journal = {Proc. NAACL Conference, Pittsburgh, June, 2001},
keywords = {Statistical Machine Translation}
}
2000
title = {Finite-state models for lexical reordering in spoken language translation},
author = {Bangalore S. and Riccardi G.},
year = {2000},
date = {2000-10-01},
journal = {Proc. ICSLP, Beijing, Oct. 2000},
keywords = {Statistical Machine Translation}
}
title = {Stochastic finite-state models for spoken language machine translation,'' Proc. Workshop on Embedded Machine Translation Systems},
author = {Bangalore S. and Riccardi G.},
year = {2000},
date = {2000-05-01},
journal = {NAACL, pp. 52-59, Seattle, May 2000},
keywords = {Statistical Machine Translation}
}