Bayer A. O. and Riccardi G. Deep Semantic Encodings for Language Modeling (Conference) 2015. (Abstract | Links | BibTeX | Tags: Language Modeling, Signal Annotation and Interpretation, Speech Processing) Bayer A. O. and Riccardi G. Semantic Language Models for Automatic Speech Recognition (Conference) 2014. (Abstract | Links | BibTeX | Tags: Language Modeling, Natural Language Processing, Speech Processing) Hakkani-Tur D., Bechet F., Riccardi G. and Tur G. Beyond ASR 1-Best: Using Word Confusion Network (Article) Computer Speech and Language, volume 20, Issue 4, pp. 495-514, 2006, 2006. (Abstract | Links | BibTeX | Tags: Language Modeling, Speech Processing) Hakkani-Tur D., Tur G., Rahim M. and Riccardi G. Unsupervised and Active Learning in Automatic Speech Recognition for Call Classification (Conference) 2004. (BibTeX | Tags: Language Modeling, Machine Learning, Speech Processing) Riccardi G. and Gorin A. L. Spoken language adaptation over time and state in a natural spoken dialog system (Article) IEEE Trans. on Speech and Audio, vol. 8, pp. 3-10, 2000, 2000. (Abstract | Links | BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Riccardi G., Bangalore S. and Sarin P. Learning head-dependency relations from unannotated corpora (Conference) 1999. (BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Gorin A. L. and Riccardi G. Spoken language variation over time and state in a natural spoken dialog system (Conference) 1999. (BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Rose R. C. and Riccardi G. Modeling dysfluency and background events in ASR for a natural language understanding task (Conference) 1999. (BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Rahim M., Riccardi G., Wright J., Buntschuh B. and Gorin A. Robust automatic speech recognition in a natural spoken dialog (Conference) 1999. (BibTeX | Tags: Language Modeling, Speech Processing) Arai K., Wright J. H., Riccardi G. and Gorin A. L. Grammar fragment acquisition using syntactic and semantic clustering (Article) Speech Communication, vol. 27, no. 1, Jan. 1999, 1999. (Abstract | Links | BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Riccardi G. and Gorin A. L. Stochastic language models for speech recognition and understanding (Conference) 1998. (BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Riccardi G., Potamianos A. and Narayanan S. Language model adaptation for spoken dialog systems (Conference) 1998. (BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Rose R. C., Yao H., Riccardi G. and Wright J. Integration of utterance verification with statistical language modeling and spoken language understanding (Conference) 1998. (BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Riccardi G. and Bangalore S. Automatic acquisition of phrase grammars for stochastic language modeling (Conference) 1998. (BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Arai K., Wright J., Riccardi G. and Gorin A. Grammar fragment acquisition using syntactic and semantic clustering,'' Proc. Workshop Spoken Language Understanding & Communication (Conference) 1997. (BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Wright J. H., Gorin A. L. and Riccardi G. Automatic acquisition of salient grammar fragments for call-type classification (Conference) 1997. (BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Riccardi G., Pieraccini R. and Bocchieri E. Stochastic automata for language modeling (Article) Computer Speech and Language, vol. 10(4), 1996, pp. 265-293, 1996. (Abstract | Links | BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Riccardi G., Bocchieri E. and Pieraccini R. Non deterministic stochastic language models for speech recognition (Conference) 1995. (BibTeX | Tags: Conversational and Interactive Systems , Language Modeling, Speech Processing) Bocchieri E., Riccardi G. and Anantharaman J. The 1994 AT&T ATIS CHRONUS recognizer (Conference) 1995. (BibTeX | Tags: Language Modeling, Signal Annotation and Interpretation) Bocchieri E. and Riccardi G. The 1993 AT&T ATIS system (Conference) 1994. (BibTeX | Tags: Language Modeling, Signal Annotation and Interpretation, Speech Processing)2015
title = {Deep Semantic Encodings for Language Modeling},
author = {Bayer A. O. and Riccardi G.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2015/11/IS15-SELMAutoEncoding.pdf},
year = {2015},
date = {2015-09-06},
journal = {Proc. INTERSPEECH , Dresden, 2015},
abstract = {Word error rate (WER) is not an appropriate metric for spoken language systems (SLS) because lower WER does not necessarily yield better understanding performance. Therefore, language models (LMs) that are used in SLS should be trained to jointly optimize transcription and understanding performance. Semantic LMs (SELMs) are based on the theory of frame semantics and incorporate features of frames and meaning bearing words (target words) as semantic context when training LMs.
The performance of SELMs is affected by the errors on the ASR and the semantic parser output. In this paper we address the problem of coping with such noise in the training phase of the neural network-based architecture of LMs. We propose the use of deep autoencoders for the encoding of semantic context while accounting for ASR errors. We investigate the optimization of SELMs both for transcription and understanding by using deep semantic encodings. Deep semantic encodings
suppress the noise introduced by the ASR module, and enable SELMs to be optimized adequately. We assess the understanding performance by measuring the errors made on target words and we achieve 3.7% relative improvement over recurrent neural network LMs.
Index Terms: Language Modeling, Semantic Language Models, Recurrent Neural Networks, Deep Autoencoders},
keywords = {Language Modeling, Signal Annotation and Interpretation, Speech Processing}
}
The performance of SELMs is affected by the errors on the ASR and the semantic parser output. In this paper we address the problem of coping with such noise in the training phase of the neural network-based architecture of LMs. We propose the use of deep autoencoders for the encoding of semantic context while accounting for ASR errors. We investigate the optimization of SELMs both for transcription and understanding by using deep semantic encodings. Deep semantic encodings
suppress the noise introduced by the ASR module, and enable SELMs to be optimized adequately. We assess the understanding performance by measuring the errors made on target words and we achieve 3.7% relative improvement over recurrent neural network LMs.
Index Terms: Language Modeling, Semantic Language Models, Recurrent Neural Networks, Deep Autoencoders2014
title = {Semantic Language Models for Automatic Speech Recognition},
author = {Bayer A. O. and Riccardi G.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2014/11/SLT14-SemanticSLM.pdf},
year = {2014},
date = {2014-10-01},
journal = {IEEE/ACL Workshop on Spoken Language Technology, Lake Tahoe, 2014},
abstract = {We are interested in the problem of semantics-aware train- ing of language models (LMs) for Automatic Speech Recog- nition (ASR). Traditional language modeling research have ignored semantic constraints and focused on limited size his- tories of words. Semantic structures may provide information to capture lexically realized long-range dependencies as well as the linguistic scene of a speech utterance. In this paper, we present a novel semantic LM (SELM) that is based on the the- ory of frame semantics. Frame semantics analyzes meaning of words by considering their role in the semantic frames they occur and by considering their syntactic properties. We show that by integrating semantic frames and target words into re- current neural network LMs we can gain significant improve- ments in perplexity and word error rates. We have evaluated the semantic LM on the publicly available ASR baselines on the Wall Street Journal (WSJ) corpus. SELMs achieve 50% and 64% relative reduction in perplexity compared to n-gram models by using frames and target words respectively. In ad- dition, 12% and 7% relative improvements in word error rates are achieved by SELMs on the Nov’92 and},
keywords = {Language Modeling, Natural Language Processing, Speech Processing}
}
2006
title = {Beyond ASR 1-Best: Using Word Confusion Network},
author = {Hakkani-Tur D., Bechet F., Riccardi G. and Tur G.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2014/11/CSL-pivot-slu.pdf},
year = {2006},
date = {2006-01-01},
journal = {Computer Speech and Language, volume 20, Issue 4, pp. 495-514, 2006},
abstract = {We are interested in the problem of robust understanding from noisy spontaneous speech input. With the advances in automated speech recognition (ASR), there has been increasing interest in spoken language understanding (SLU). A challenge in large vocabulary spoken language understanding is robustness to ASR errors. State of the art spoken language understanding relies on the best ASR hypotheses (ASR 1-best). In this paper, we propose methods for a tighter integration of ASR and SLU using word confusion networks (WCNs). WCNs obtained from ASR word graphs (lattices) provide a compact representation of multiple aligned ASR hypotheses along with word confidence scores, without compromising recognition accuracy. We present our work on exploiting WCNs instead of simply using ASR one-best hypotheses. In this work, we focus on the tasks of named entity detection and extraction and call classification in a spoken dialog system, although the idea is more general and applicable to other spoken language processing tasks. For named entity detection, we have improved the F-measure by using both word lattices and WCNs, 6–10% absolute. The processing of WCNs was 25 times faster than lattices, which is very important for real-life applications. For call classification, we have shown between 5% and 10% relative reduction in error rate using WCNs compared to ASR 1-best output. Ó 2005 Elsevier Ltd. All rights reserved.},
keywords = {Language Modeling, Speech Processing}
}
2004
title = {Unsupervised and Active Learning in Automatic Speech Recognition for Call Classification},
author = {Hakkani-Tur D., Tur G., Rahim M. and Riccardi G.},
year = {2004},
date = {2004-01-01},
journal = {ICASSP, Montreal, May 2004},
keywords = {Language Modeling, Machine Learning, Speech Processing}
}
2000
title = {Spoken language adaptation over time and state in a natural spoken dialog system},
author = {Riccardi G. and Gorin A. L.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2014/11/IEEETSLP00-LMAdapt.pdf},
year = {2000},
date = {2000-01-01},
journal = {IEEE Trans. on Speech and Audio, vol. 8, pp. 3-10, 2000},
abstract = {We are interested in adaptive spoken dialog systems for automated services. Peoples’ spoken language usage varies over time for a given task, and furthermore varies depending on the state of the dialog. Thus, it is crucial to adapt automatic speech recognition (ASR) language models to these varying conditions. We characterize and quantify these variations based on a database of 30K user-transactions with AT&T’s experimental How May I Help You? spoken dialog system. We describe a novel adaptation algorithm for language models with time and dialog-state varying parameters. Our language adaptation framework allows for recognizing and understanding unconstrained speech at each stage of the dialog, enabling context-switching and error recovery. These models have been used to train state-dependent ASR language models. We have evaluated their performance with respect to word accuracy and perplexity over time and dialog states. We have achieved a reduction of 40% in perplexity and of 8.4% in word error rate over the baseline system, averaged across all dialog states.},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
1999
title = {Learning head-dependency relations from unannotated corpora},
author = {Riccardi G., Bangalore S. and Sarin P.},
year = {1999},
date = {1999-12-01},
journal = {Proc. IEEE ASRU, Keystone, Colorado, Dec. 1999},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
title = {Spoken language variation over time and state in a natural spoken dialog system},
author = {Gorin A. L. and Riccardi G.},
year = {1999},
date = {1999-03-01},
journal = {Proc. ICASSP, Phoenix, Mar. 1999},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
title = {Modeling dysfluency and background events in ASR for a natural language understanding task},
author = {Rose R. C. and Riccardi G.},
year = {1999},
date = {1999-03-01},
journal = {Proc. ICASSP., Phoenix, March 1999},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
title = {Robust automatic speech recognition in a natural spoken dialog},
author = {Rahim M., Riccardi G., Wright J., Buntschuh B. and Gorin A.},
year = {1999},
date = {1999-01-01},
journal = {Workshop on Robust Methods for Speech Recognition in Adverse Condition, Tampere, Finland, 1999},
keywords = {Language Modeling, Speech Processing}
}
title = {Grammar fragment acquisition using syntactic and semantic clustering},
author = {Arai K., Wright J. H., Riccardi G. and Gorin A. L.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2014/11/fragclustering-speechcomm-19981.pdf},
year = {1999},
date = {1999-01-01},
journal = {Speech Communication, vol. 27, no. 1, Jan. 1999},
abstract = {A new method for automatically acquiring Fragments for understanding ̄uent speech is proposed. The goal of this method is to generate a collection of Fragments, each representing a set of syntactically and semantically similar phrases. First, phrases observed frequently in the training set are selected as candidates. Each candidate phrase has three associated probability distributions: of following contexts, of preceding contexts, and of associated semantic actions. The similarity between candidate phrases is measured by applying the Kullback±Leibler distance to these three probability distributions. Candidate phrases that are close in all three distances are clustered into a Fragment. Salient sequences of these Fragments are then automatically acquired, and exploited by a spoken language understanding module to classify calls in AT&T\'s ``How may I help you?\'\' task. These Fragments allow us to generalize unobserved phrases. For instance, they detected 246 phrases in the test-set that were not present in the training-set. This result shows that unseen phrases can be automatically discovered by our new method. Experimental results show that 2.8% of the improvement in call-type classi®catio},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
1998
title = {Stochastic language models for speech recognition and understanding},
author = {Riccardi G. and Gorin A. L.},
year = {1998},
date = {1998-11-01},
journal = {Proc. ICSLP, Sydney, Nov. 1998},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
title = {Language model adaptation for spoken dialog systems},
author = {Riccardi G., Potamianos A. and Narayanan S.},
year = {1998},
date = {1998-11-01},
journal = {Proc. ICSLP, Sydney, Nov. 1998},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
title = {Integration of utterance verification with statistical language modeling and spoken language understanding},
author = {Rose R. C., Yao H., Riccardi G. and Wright J.},
year = {1998},
date = {1998-05-01},
journal = {Proc. ICASSP., Seattle, May 1998},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
title = {Automatic acquisition of phrase grammars for stochastic language modeling},
author = {Riccardi G. and Bangalore S.},
year = {1998},
date = {1998-01-01},
journal = {Proc. 6th ACL Workshop on Very Large Corpora, Montreal, 1998},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
1997
title = {Grammar fragment acquisition using syntactic and semantic clustering,'' Proc. Workshop Spoken Language Understanding & Communication},
author = {Arai K., Wright J., Riccardi G. and Gorin A.},
year = {1997},
date = {1997-12-01},
journal = {Yokosuka, Japan, Dec. 1997},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
title = {Automatic acquisition of salient grammar fragments for call-type classification},
author = {Wright J. H., Gorin A. L. and Riccardi G.},
year = {1997},
date = {1997-01-01},
journal = {Proc. EUROSPEECH, Rhodes, Greece, 1997, pp. 1419-1422},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
1996
title = {Stochastic automata for language modeling},
author = {Riccardi G., Pieraccini R. and Bocchieri E.},
url = {https://sisl.disi.unitn.it/wp-content/uploads/2014/11/csl96.pdf},
year = {1996},
date = {1996-01-01},
journal = { Computer Speech and Language, vol. 10(4), 1996, pp. 265-293},
abstract = {Stochastic language models are widely used in spoken language understanding to recognize and interpret the speech signal: the speech samples are decoded into word transcriptions by means of acoustic and syntactic models and then interpreted according to a semantic model. Both for speech recognition and understanding, search algorithms use stochastic models to extract the most likely uttered sentence and its correspondent interpretation. The design of the language models has to be effective in order to mostly constrain the search algorithms and has to be efficient to comply with the storage space limits. In this work we present the Variable N-gram Stochastic Automaton (VNSA) language model that provides a unified formalism for building a wide class of language models. First, this approach allows for the use of accurate language models for large vocabulary speech recognition by using the standard search algorithm in the one-pass Viterbi decoder. Second, the unified formalism is an effective approach to incorporate different sources of information for computing the probability of word sequences. Third, the VNSAs are well suited for those applications where speech and language decoding cascades are implemented through weighted rational transductions. The VNSAs have been compared to standard bigram and trigram language models and their reduced set of parameters does not affect by any means the performances in terms of perplexity. The design of a stochastic language model through the VNSA is described and applied to word and phrase class-based language models. The effectiveness of VNSAs has been tested within the Air Travel Information System (ATIS) task to build the language model for th},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
1995
title = {Non deterministic stochastic language models for speech recognition},
author = {Riccardi G., Bocchieri E. and Pieraccini R.},
year = {1995},
date = {1995-01-01},
journal = {Proc. ICASSP, Detroit, pp. 247-250, Detroit, 1995},
keywords = {Conversational and Interactive Systems , Language Modeling, Speech Processing}
}
title = {The 1994 AT&T ATIS CHRONUS recognizer},
author = {Bocchieri E., Riccardi G. and Anantharaman J.},
year = {1995},
date = {1995-01-01},
journal = {Proc. 1995 ARPA Spoken Languge Technology Workshop, Austin, Texas, Jan. 1995, pp. 265-268},
keywords = {Language Modeling, Signal Annotation and Interpretation}
}
1994
title = {The 1993 AT&T ATIS system},
author = {Bocchieri E. and Riccardi G.},
year = {1994},
date = {1994-01-01},
journal = {Proc. 1994 ARPA Spoken Language Technology Workshop, Plainsboro, NJ, March 1994, pp. 41-42},
keywords = {Language Modeling, Signal Annotation and Interpretation, Speech Processing}
}