Entry Guo:2010:LIS from talip.bib

Last update: Sun Oct 15 02:55:04 MDT 2017                Valid HTML 3.2!

Index sections

Top | Symbols | Numbers | Math | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z

BibTeX entry

@Article{Guo:2010:LIS,
  author =       "Yuqing Guo and Haifeng Wang and Josef van Genabith",
  title =        "A Linguistically Inspired Statistical Model for
                 {Chinese} Punctuation Generation",
  journal =      j-TALIP,
  volume =       "9",
  number =       "2",
  pages =        "6:1--6:??",
  month =        jun,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1781134.1781136",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 21 18:03:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article investigates a relatively underdeveloped
                 subject in natural language processing---the generation
                 of punctuation marks. From a theoretical perspective,
                 we study 16 Chinese punctuation marks as defined in the
                 Chinese national standard of punctuation usage, and
                 categorize these punctuation marks into three different
                 types according to their syntactic properties. We
                 implement a three-tier maximum entropy model
                 incorporating linguistically-motivated features for
                 generating the commonly used Chinese punctuation marks
                 in unpunctuated sentences output by a surface realizer.
                 Furthermore, we present a method to automatically
                 extract cue words indicating sentence-final punctuation
                 marks as a specialized feature to construct a more
                 precise model. Evaluating on the Penn Chinese Treebank
                 data, the MaxEnt model achieves an {\em f\/} -score of
                 79.83\% for punctuation insertion and 74.61\% for
                 punctuation restoration using gold data input, 79.50\%
                 for insertion and 73.32\% for restoration using
                 parser-based imperfect input. The experiments show that
                 the MaxEnt model significantly outperforms a baseline
                 5-gram language model that scores 54.99\% for
                 punctuation insertion and 52.01\% for restoration. We
                 show that our results are not far from human
                 performance on the same task with human insertion {\em
                 f\/} -scores in the range of 81-87\% and human
                 restoration in the range of 71-82\%. Finally, a manual
                 error analysis of the generation output shows that
                 close to 40\% of the mismatched punctuation marks do in
                 fact result in acceptable choices, a fact obscured in
                 the automatic string-matching based evaluation
                 scores.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Chinese punctuation marks; maximum entropy model;
                 sentence realization",
}

Related entries