Entry Ma:2009:BMW from talip.bib

Last update: Sun Oct 15 02:55:04 MDT 2017                Valid HTML 3.2!

Index sections

Top | Symbols | Numbers | Math | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z

BibTeX entry

@Article{Ma:2009:BMW,
  author =       "Yanjun Ma and Andy Way",
  title =        "Bilingually Motivated Word Segmentation for
                 Statistical Machine Translation",
  journal =      j-TALIP,
  volume =       "8",
  number =       "2",
  pages =        "7:1--7:??",
  month =        may,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1526252.1526255",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Jun 3 16:13:52 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "We introduce a bilingually motivated word segmentation
                 approach to languages where word boundaries are not
                 orthographically marked, with application to
                 Phrase-Based Statistical Machine Translation (PB-SMT).
                 Our approach is motivated from the insight that PB-SMT
                 systems can be improved by optimizing the input
                 representation to reduce the predictive power of
                 translation models. We firstly present an approach to
                 optimize the existing segmentation of both source and
                 target languages for PB-SMT and demonstrate the
                 effectiveness of this approach using a Chinese--English
                 MT task, that is, to measure the influence of the
                 segmentation on the performance of PB-SMT systems. We
                 report a 5.44\% relative increase in Bleu score and a
                 consistent increase according to other metrics. We then
                 generalize this method for Chinese word segmentation
                 without relying on any segmenters and show that using
                 our segmentation PB-SMT can achieve more consistent
                 state-of-the-art performance across two domains. There
                 are two main advantages of our approach. First of all,
                 it is adapted to the specific translation task at hand
                 by taking the corresponding source (target) language
                 into account. Second, this approach does not rely on
                 manually segmented training data so that it can be
                 automatically adapted for different domains.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "alignment; bilingually motivated; phrase-based
                 statistical machine translation; word segmentation",
}

Related entries