Entry Andrade:2012:SEC from talip.bib

Last update: Sun Oct 15 02:55:04 MDT 2017                Valid HTML 3.2!

Index sections

Top | Symbols | Numbers | Math | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z

BibTeX entry

@Article{Andrade:2012:SEC,
  author =       "Daniel Andrade and Takuya Matsuzaki and Jun'ichi
                 Tsujii",
  title =        "Statistical Extraction and Comparison of Pivot Words
                 for Bilingual Lexicon Extension",
  journal =      j-TALIP,
  volume =       "11",
  number =       "2",
  pages =        "6:1--6:??",
  month =        jun,
  year =         "2012",
  DOI =          "https://doi.org/10.1145/2184436.2184439",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Jun 12 11:20:16 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Bilingual dictionaries can be automatically extended
                 by new translations using comparable corpora. The
                 general idea is based on the assumption that similar
                 words have similar contexts across languages. However,
                 previous studies have mainly focused on Indo-European
                 languages, or use only a bag-of-words model to describe
                 the context. Furthermore, we argue that it is helpful
                 to extract only the statistically significant context,
                 instead of using all context. The present approach
                 addresses these issues in the following manner. First,
                 based on the context of a word with an unknown
                 translation (query word), we extract salient pivot
                 words. Pivot words are words for which a translation is
                 already available in a bilingual dictionary. For the
                 extraction of salient pivot words, we use a Bayesian
                 estimation of the point-wise mutual information to
                 measure statistical significance. In the second step,
                 we match these pivot words across languages to identify
                 translation candidates for the query word. We therefore
                 calculate a similarity score between the query word and
                 a translation candidate using the probability that the
                 same pivots will be extracted for both the query word
                 and the translation candidate. The proposed method uses
                 several context positions, namely, a bag-of-words of
                 one sentence, and the successors, predecessors, and
                 siblings with respect to the dependency parse tree of
                 the sentence. In order to make these context positions
                 comparable across Japanese and English, which are
                 unrelated languages, we use several heuristics to
                 adjust the dependency trees appropriately. We
                 demonstrate that the proposed method significantly
                 increases the accuracy of word translations, as
                 compared to previous methods.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing (TALIP)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

Related entries