Entry Naptali:2010:TDL from talip.bib

Last update: Sun Oct 15 02:55:04 MDT 2017                Valid HTML 3.2!

Index sections

Top | Symbols | Numbers | Math | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z

BibTeX entry

@Article{Naptali:2010:TDL,
  author =       "Welly Naptali and Masatoshi Tsuchiya and Seiichi
                 Nakagawa",
  title =        "Topic-Dependent Language Model with Voting on Noun
                 History",
  journal =      j-TALIP,
  volume =       "9",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jun,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1781134.1781137",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 21 18:03:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Language models (LMs) are an important field of study
                 in automatic speech recognition (ASR) systems. LM helps
                 acoustic models find the corresponding word sequence of
                 a given speech signal. Without it, ASR systems would
                 not understand the language and it would be hard to
                 find the correct word sequence. During the past few
                 years, researchers have tried to incorporate long-range
                 dependencies into statistical word-based $n$-gram LMs.
                 One of these long-range dependencies is topic. Unlike
                 words, topic is unobservable. Thus, it is required to
                 find the meanings behind the words to get into the
                 topic. This research is based on the belief that nouns
                 contain topic information. We propose a new approach
                 for a topic-dependent LM, where the topic is decided in
                 an unsupervised manner. Latent Semantic Analysis (LSA)
                 is employed to reveal hidden (latent) relations among
                 nouns in the context words. To decide the topic of an
                 event, a fixed size word history sequence (window) is
                 observed, and voting is then carried out based on noun
                 class occurrences weighted by a confidence measure.
                 Experiments were conducted on an English corpus and a
                 Japanese corpus: {\em The Wall Street Journal\/} corpus
                 and {\em Mainichi Shimbun\/} (Japanese newspaper)
                 corpus. The results show that our proposed method gives
                 better perplexity than the comparative baselines,
                 including a word-based/class-based $n$-gram LM, their
                 interpolated LM, a cache-based LM, a topic-dependent LM
                 based on $n$-gram, and a topic-dependent LM based on
                 Latent Dirichlet Allocation (LDA). The {\em n\/} -best
                 list rescoring was conducted to validate its
                 application in ASR systems.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Language model; latent semantic analysis; perplexity;
                 speech recognition; topic dependent",
}

Related entries