BibTeX bibliography talip.bib

%%% -*-BibTeX-*-
%%% ====================================================================
%%%  BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.50",
%%%     date            = "14 October 2017",
%%%     time            = "10:26:28 MDT",
%%%     filename        = "talip.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "52522 8478 41180 402838",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "Asian language information processing,
%%%                        bibliography, BibTeX, TALIP",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        ACM Transactions on Asian language
%%%                        information processing (TALIP) (CODEN none,
%%%                        ISSN 1530-0226 (print), 1558-3430
%%%                        (electronic)), which began publishing in
%%%                        March 2002.
%%%
%%%                        Publication ceased with volume 13, number 4,
%%%                        in 2014 when the journal was renamed to ACM
%%%                        Transactions on Asian and Low-Resource
%%%                        Language Information Processing (TALLIP).
%%%                        The new journal is covered in a separate
%%%                        bibliography, tallip.bib.
%%%
%%%                        The journal has a World Wide Web site at
%%%
%%%                            http://www.acm.org/pubs/talip/
%%%                            http://portal.acm.org/browse_dl.cfm?&idx=J820
%%%
%%%                        At version 1.50, the year coverage looked
%%%                        like this:
%%%
%%%                             2002 (  15)    2007 (  14)    2012 (  18)
%%%                             2003 (  22)    2008 (  13)    2013 (  17)
%%%                             2004 (  17)    2009 (  19)    2014 (  18)
%%%                             2005 (  17)    2010 (  15)
%%%                             2006 (  28)    2011 (  21)
%%%
%%%                             Article:        234
%%%
%%%                             Total entries:  234
%%%
%%%                        This bibliography has been constructed
%%%                        primarily from the publisher Web site.
%%%
%%%                        Numerous errors in the sources noted above
%%%                        have been corrected.  Spelling has been
%%%                        verified with the UNIX spell and GNU ispell
%%%                        programs using the exception dictionary
%%%                        stored in the companion file with extension
%%%                        .sok.
%%%
%%%                        BibTeX citation tags are uniformly chosen as
%%%                        name:year:abbrev, where name is the family
%%%                        name of the first author or editor, year is a
%%%                        4-digit number, and abbrev is a 3-letter
%%%                        condensation of important title words.
%%%                        Citation labels were automatically generated
%%%                        by software developed for the BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, with the help of
%%%                        ``bibsort -byvolume''.  The bibsort utility
%%%                        is available from ftp.math.utah.edu in
%%%                        /pub/tex/bib.
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility.",
%%%  }
%%% ====================================================================

@Preamble{
    "\hyphenation{ }"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:

@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:

@String{j-TALIP                 = "ACM Transactions on Asian Language
                                  Information Processing"}

%%% ====================================================================
%%% Bibliography entries:

@Article{Wong:2002:P,
  author =       "Kam-Fai Wong and Jun'ichi Tsujii",
  title =        "Prologue",
  journal =      j-TALIP,
  volume =       "1",
  number =       "1",
  pages =        "1--2",
  month =        mar,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Nov 5 23:44:34 MST 2002",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Gao:2002:TUA,
  author =       "Jianfeng Gao and Joshua Goodman and Mingjing Li and
                 Kai-Fu Lee",
  title =        "Toward a unified approach to statistical language
                 modeling for {Chinese}",
  journal =      j-TALIP,
  volume =       "1",
  number =       "1",
  pages =        "3--33",
  month =        mar,
  year =         "2002",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/509900.509903",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Nov 5 23:44:34 MST 2002",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Lai:2002:MTE,
  author =       "Yu-Sheng Lai and Chung-Hsien Wu",
  title =        "Meaningful term extraction and discriminative term
                 selection in text categorization via unknown-word
                 methodology",
  journal =      j-TALIP,
  volume =       "1",
  number =       "1",
  pages =        "34--64",
  month =        mar,
  year =         "2002",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/509900.509904",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Nov 5 23:44:34 MST 2002",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kim:2002:MBG,
  author =       "Byeongchang Kim and Gary Geunbae Lee and Jong-Hyeok
                 Lee",
  title =        "Morpheme-based grapheme to phoneme conversion using
                 phonetic patterns and morphophonemic connectivity
                 information",
  journal =      j-TALIP,
  volume =       "1",
  number =       "1",
  pages =        "65--82",
  month =        mar,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Nov 5 23:44:34 MST 2002",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Lee:2002:UTI,
  author =       "Tan Lee and Wai Lau and Y. W. Wong and P. C. Ching",
  title =        "Using tone information in {Cantonese} continuous
                 speech recognition",
  journal =      j-TALIP,
  volume =       "1",
  number =       "1",
  pages =        "83--102",
  month =        mar,
  year =         "2002",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/509900.509906",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Nov 5 23:44:34 MST 2002",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Chen:2002:BCE,
  author =       "Hsin-Hsi Chen and Chi-Ching Lin and Wen-Cheng Lin",
  title =        "Building a {Chinese--English} wordnet for translingual
                 applications",
  journal =      j-TALIP,
  volume =       "1",
  number =       "2",
  pages =        "103--122",
  month =        jun,
  year =         "2002",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/568954.568955",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Nov 5 23:44:36 MST 2002",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Meng:2002:GPM,
  author =       "Helen Meng and Po-Chui Luk and Kui Xu and Fuliang
                 Weng",
  title =        "{GLR} parsing with multiple grammars for natural
                 language queries",
  journal =      j-TALIP,
  volume =       "1",
  number =       "2",
  pages =        "123--144",
  month =        jun,
  year =         "2002",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/568954.568956",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Nov 5 23:44:36 MST 2002",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Murata:2002:CTM,
  author =       "Masaki Murata and Qing Ma and Hitoshi Isahara",
  title =        "Comparison of three machine-learning methods for
                 {Thai} part-of-speech tagging",
  journal =      j-TALIP,
  volume =       "1",
  number =       "2",
  pages =        "145--158",
  month =        jun,
  year =         "2002",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/568954.568957",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Nov 5 23:44:36 MST 2002",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Lu:2002:TWQ,
  author =       "Wen-Hsiang Lu and Lee-Feng Chien and Hsi-Jian Lee",
  title =        "Translation of {Web} queries using anchor text
                 mining",
  journal =      j-TALIP,
  volume =       "1",
  number =       "2",
  pages =        "159--172",
  month =        jun,
  year =         "2002",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/568954.568958",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Nov 5 23:44:36 MST 2002",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Li:2002:WBA,
  author =       "Wenjie Li and Kam-Fai Wong",
  title =        "A word-based approach for modeling and discovering
                 temporal relations embedded in {Chinese} sentences",
  journal =      j-TALIP,
  volume =       "1",
  number =       "3",
  pages =        "173--206",
  month =        sep,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Aug 7 08:49:00 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Lee:2002:ACB,
  author =       "Jin-Seok Lee and Byeongchang Kim and Gary Geunbae
                 Lee",
  title =        "Automatic corpus-based tone and break-index prediction
                 using {K-ToBI} representation",
  journal =      j-TALIP,
  volume =       "1",
  number =       "3",
  pages =        "207--224",
  month =        sep,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Aug 7 08:49:00 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Luk:2002:CCD,
  author =       "Robert W. P. Luk and K. L. Kwok",
  title =        "A comparison of {Chinese} document indexing strategies
                 and retrieval models",
  journal =      j-TALIP,
  volume =       "1",
  number =       "3",
  pages =        "225--268",
  month =        sep,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Aug 7 08:49:00 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Suzuki:2002:LCS,
  author =       "Izumi Suzuki and Yoshiki Mikami and Ario Ohsato and
                 Yoshihide Chubachi",
  title =        "A language and character set determination method
                 based on {N}-gram statistics",
  journal =      j-TALIP,
  volume =       "1",
  number =       "3",
  pages =        "269--278",
  month =        sep,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Aug 7 08:49:00 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Jin:2002:CDC,
  author =       "Honglan Jin and Kam-Fai Wong",
  title =        "A {Chinese} dictionary construction algorithm for
                 information retrieval",
  journal =      j-TALIP,
  volume =       "1",
  number =       "4",
  pages =        "281--296",
  month =        dec,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Aug 7 08:49:01 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Li:2002:CCB,
  author =       "Yuanxiang Li and Xiaoqing Ding and Chew Lim Tan",
  title =        "Combining character-based bigrams with word-based
                 bigrams in contextual postprocessing for {Chinese}
                 script recognition",
  journal =      j-TALIP,
  volume =       "1",
  number =       "4",
  pages =        "297--309",
  month =        dec,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Aug 7 08:49:01 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Lo:2003:CLS,
  author =       "Wai-Kit Lo and Helen Meng and P. C. Ching",
  title =        "Cross-language spoken document retrieval using
                 {HMM}-based retrieval model with multi-scale fusion",
  journal =      j-TALIP,
  volume =       "2",
  number =       "1",
  pages =        "1--26",
  month =        mar,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sun Jan 11 10:17:38 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Shi:2003:OHC,
  author =       "Daming Shi and Robert I. Damper and Steve R. Gunn",
  title =        "Offline handwritten {Chinese} character recognition by
                 radical decomposition",
  journal =      j-TALIP,
  volume =       "2",
  number =       "1",
  pages =        "27--48",
  month =        mar,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sun Jan 11 10:17:38 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Lee:2003:TAS,
  author =       "Yue-Shi Lee",
  title =        "Task adaptation in stochastic language model for
                 {Chinese} homophone disambiguation",
  journal =      j-TALIP,
  volume =       "2",
  number =       "1",
  pages =        "49--62",
  month =        mar,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sun Jan 11 10:17:38 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Shieh:2003:EAT,
  author =       "Jiann-Cherng Shieh",
  title =        "An efficient accessing technique for {Taiwanese}
                 phonetic transcriptions",
  journal =      j-TALIP,
  volume =       "2",
  number =       "1",
  pages =        "63--77",
  month =        mar,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sun Jan 11 10:17:38 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Oard:2003:SLE,
  author =       "Douglas W. Oard",
  title =        "The surprise language exercises",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "79--84",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Allan:2003:MTD,
  author =       "James Allan and Victor Lavrenko and Margaret E.
                 Connell",
  title =        "A month to topic detection and tracking in {Hindi}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "85--100",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Strassel:2003:LRC,
  author =       "Stephanie Strassel and Mike Maxwell and Christopher
                 Cieri",
  title =        "Linguistic resource creation for research and
                 technology development: a recent experiment",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "101--117",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Dorr:2003:RPD,
  author =       "Bonnie J. Dorr and Necip Fazil Ayan and Nizar Habash
                 and Nitin Madnani and Rebecca Hwa",
  title =        "Rapid porting of {DUSTer} to {Hindi}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "118--123",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Huang:2003:ENE,
  author =       "Fei Huang and Stephan Vogel and Alex Waibel",
  title =        "Extracting named entity translingual equivalence with
                 limited resources",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "124--129",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Larkey:2003:HCT,
  author =       "Leah S. Larkey and Margaret E. Connell and Nasreen
                 Abduljaleel",
  title =        "{Hindi CLIR} in thirty days",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "130--142",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Lavie:2003:EHE,
  author =       "Alon Lavie and Stephan Vogel and Lori Levin and Erik
                 Peterson and Katharina Probst and Ariadna Font
                 Llitj{\'o}s and Rachel Reynolds and Jaime Carbonell and
                 Richard Cohen",
  title =        "Experiments with a {Hindi-to-English} transfer-based
                 {MT} system under a miserly data scenario",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "143--163",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Xu:2003:CLR,
  author =       "Jinxi Xu and Ralph Weischedel",
  title =        "Cross-lingual retrieval for {Hindi}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "164--168",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{May:2003:SWC,
  author =       "Jonathan May and Ada Brunstein and Prem Natarajan and
                 Ralph Weischedel",
  title =        "Surprise! {What}'s in a {Cebuano} or {Hindi Name?}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "169--180",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sekine:2003:HEC,
  author =       "Satoshi Sekine and Ralph Grishman",
  title =        "{Hindi-English} cross-lingual question-answering
                 system",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "181--192",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Ma:2003:AHO,
  author =       "Huanfeng Ma and David Doermann",
  title =        "Adaptive {Hindi OCR} using generalized {Hausdorff}
                 image comparison",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "193--218",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{He:2003:MMI,
  author =       "Daqing He and Douglas W. Oard and Jianqiang Wang and
                 Jun Luo and Dina Demner-Fushman and Kareem Darwish and
                 Philip Resnik and Sanjeev Khudanpur and Michael Nossal
                 and Michael Subotin and Anton Leuski",
  title =        "Making {MIRACLEs}: {Interactive} translingual search
                 for {Cebuano} and {Hindi}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "219--244",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Leuski:2003:CLC,
  author =       "Anton Leuski and Chin-Yew Lin and Liang Zhou and
                 Ulrich Germann and Franz Josef Och and Eduard Hovy",
  title =        "Cross-lingual {C*ST*RD}: {English} access to {Hindi}
                 information",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "245--269",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Dorr:2003:CLH,
  author =       "Bonnie Dorr and David Zajic and Richard Schwartz",
  title =        "Cross-language headline generation for {Hindi}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "270--289",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Li:2003:RDH,
  author =       "Wei Li and Andrew McCallum",
  title =        "Rapid development of {Hindi} named entity recognition
                 using conditional random fields and feature induction",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "290--294",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Maynard:2003:RCI,
  author =       "Diana Maynard and Valentin Tablan and Kalina Bontcheva
                 and Hamish Cunningham",
  title =        "Rapid customization of an information extraction
                 system for a surprise language",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "295--300",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kang:2003:IPP,
  author =       "Mi-Young Kang and Aesun Yoon and Hyuk-Chul Kwon",
  title =        "Improving partial parsing based on error-pattern
                 analysis for a {Korean} grammar-checker",
  journal =      j-TALIP,
  volume =       "2",
  number =       "4",
  pages =        "301--323",
  month =        dec,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kim:2003:RRE,
  author =       "Harksoo Kim and Jungyun Seo",
  title =        "Resolution of referring expressions in a {Korean}
                 multimodal dialogue system",
  journal =      j-TALIP,
  volume =       "2",
  number =       "4",
  pages =        "324--337",
  month =        dec,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Mani:2004:ISI,
  author =       "Inderjeet Mani and James Pustejovsky and Beth
                 Sundheim",
  title =        "Introduction to the special issue on temporal
                 information processing",
  journal =      j-TALIP,
  volume =       "3",
  number =       "1",
  pages =        "1--10",
  month =        mar,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Han:2004:FRT,
  author =       "Benjamin Han and Alon Lavie",
  title =        "A framework for resolution of time in natural
                 language",
  journal =      j-TALIP,
  volume =       "3",
  number =       "1",
  pages =        "11--32",
  month =        mar,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Schilder:2004:EMT,
  author =       "Frank Schilder",
  title =        "Extracting meaning from temporal nouns and temporal
                 prepositions",
  journal =      j-TALIP,
  volume =       "3",
  number =       "1",
  pages =        "33--50",
  month =        mar,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Jang:2004:ATT,
  author =       "Seok Bae Jang and Jennifer Baldwin and Inderjeet
                 Mani",
  title =        "Automatic {TIMEX2} tagging of {Korean} news",
  journal =      j-TALIP,
  volume =       "3",
  number =       "1",
  pages =        "51--65",
  month =        mar,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Hobbs:2004:OTS,
  author =       "Jerry R. Hobbs and Feng Pan",
  title =        "An ontology of time for the {Semantic Web}",
  journal =      j-TALIP,
  volume =       "3",
  number =       "1",
  pages =        "66--85",
  month =        mar,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Gao:2004:ISI,
  author =       "Jianfeng Gao and Chin-Yew Lin",
  title =        "Introduction to the special issue on statistical
                 language modeling",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "87--93",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kim:2004:LTL,
  author =       "Woosung Kim and Sanjeev Khudanpur",
  title =        "Lexical triggers and latent semantic analysis for
                 cross-lingual language model adaptation",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "94--112",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Linares:2004:HLM,
  author =       "Diego Linares and Jos{\'e}-Miguel Bened{\'\i} and
                 Joan-Andreu S{\'a}nchez",
  title =        "A hybrid language model based on a combination of
                 {$N$}-grams and stochastic context-free grammars",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "113--127",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Chen:2004:DHG,
  author =       "Berlin Chen and Hsin-Min Wang and Lin-Shan Lee",
  title =        "A discriminative {HMM\slash N}-gram-based retrieval
                 approach for {Mandarin} spoken documents",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "128--145",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Nguyen:2004:EBS,
  author =       "Minh Le Nguyen and Susumu Horiguchi and Akira Shimazu
                 and Bao Tu Ho",
  title =        "Example-based sentence reduction using the hidden
                 {Markov} model",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "146--158",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Fung:2004:MEC,
  author =       "Pascale Fung and Grace Ngai and Yongsheng Yang and
                 Benfeng Chen",
  title =        "A maximum-entropy {Chinese} parser augmented by
                 transformation-based learning",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "159--168",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Li:2004:AMF,
  author =       "Yujia Li and Tan Lee and Yao Qian",
  title =        "Analysis and modeling of {F0} contours for {Cantonese}
                 text-to-speech",
  journal =      j-TALIP,
  volume =       "3",
  number =       "3",
  pages =        "169--180",
  month =        sep,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Huang:2004:UWB,
  author =       "Chien-Chung Huang and Shui-Lung Chuang and Lee-Feng
                 Chien",
  title =        "Using a {Web}-based categorization approach to
                 generate thematic metadata from texts",
  journal =      j-TALIP,
  volume =       "3",
  number =       "3",
  pages =        "190--212",
  month =        sep,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Myaeng:2004:ISI,
  author =       "Sung Hyon Myaeng",
  title =        "Introduction to the special issue on computer
                 processing of oriental languages",
  journal =      j-TALIP,
  volume =       "3",
  number =       "4",
  pages =        "213--213",
  month =        dec,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Baoli:2004:ANN,
  author =       "Li Baoli and Lu Qin and Yu Shiwen",
  title =        "An adaptive $k$-nearest neighbor text categorization
                 strategy",
  journal =      j-TALIP,
  volume =       "3",
  number =       "4",
  pages =        "215--226",
  month =        dec,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kim:2004:UTI,
  author =       "Pyung Kim and Sung Hyon Myaeng",
  title =        "Usefulness of temporal information automatically
                 extracted from news articles for topic tracking",
  journal =      j-TALIP,
  volume =       "3",
  number =       "4",
  pages =        "227--242",
  month =        dec,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Zhang:2004:ESS,
  author =       "Le Zhang and Jingbo Zhu and Tianshun Yao",
  title =        "An evaluation of statistical spam filtering
                 techniques",
  journal =      j-TALIP,
  volume =       "3",
  number =       "4",
  pages =        "243--269",
  month =        dec,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Wu:2005:DSF,
  author =       "Chung-Hsien Wu and Jui-Feng Yeh and Ming-Jun Chen",
  title =        "Domain-specific {FAQ} retrieval using independent
                 aspects",
  journal =      j-TALIP,
  volume =       "4",
  number =       "1",
  pages =        "1--17",
  month =        mar,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jul 7 13:48:21 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Murata:2005:CEV,
  author =       "Masaki Murata and Masao Utiyama and Kiyotaka Uchimoto
                 and Hitoshi Isahara and Qing Ma",
  title =        "Correction of errors in a verb modality corpus for
                 machine translation with a machine-learning method",
  journal =      j-TALIP,
  volume =       "4",
  number =       "1",
  pages =        "18--37",
  month =        mar,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jul 7 13:48:21 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Hendessi:2005:SSP,
  author =       "F. Hendessi and A. Ghayoori and T. A. Gulliver",
  title =        "A speech synthesizer for {Persian} text using a neural
                 network with a smooth ergodic {HMM}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "1",
  pages =        "38--52",
  month =        mar,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jul 7 13:48:21 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Zhang:2005:COT,
  author =       "Ying Zhang and Phil Vines and Justin Zobel",
  title =        "{Chinese} {OOV} translation and post-translation query
                 expansion in {Chinese--English} cross-lingual
                 information retrieval",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "57--77",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Qu:2005:TES,
  author =       "Yan Qu and David A. Hull and Gregory Grefenstette and
                 David A. Evans and Motoko Ishikawa and Setsuko Nara and
                 Toshiya Ueda and Daisuke Noda and Kousaku Arita and
                 Yuki Funakoshi and Hiroshi Matsuda",
  title =        "Towards effective strategies for monolingual and
                 bilingual information retrieval: {Lessons} learned from
                 {NTCIR-4}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "78--110",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sakai:2005:FPR,
  author =       "Tetsuya Sakai and Toshihiko Manabe and Makoto
                 Koyama",
  title =        "Flexible pseudo-relevance feedback via selective
                 sampling",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "111--135",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kwok:2005:RRP,
  author =       "Kui Lam Kwok and Sora Choi and Norbert Dinstl",
  title =        "Rich results from poor resources: {NTCIR-4}
                 monolingual and cross-lingual retrieval of {Korean}
                 texts using {Chinese} and {English}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "135--158",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Savoy:2005:CSM,
  author =       "Jacques Savoy",
  title =        "Comparative study of monolingual and multilingual
                 search models for use with {Asian} languages",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "159--185",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Mase:2005:PTS,
  author =       "Hisao Mase and Tadataka Matsubayashi and Yuichi Ogawa
                 and Makoto Iwayama and Tadaaki Oshio",
  title =        "Proposal of two-stage patent retrieval method
                 considering the claim structure",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "186--202",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Nakagawa:2005:PSI,
  author =       "Hiroshi Nakagawa and Tatsunori Mori and Noriko
                 Kando",
  title =        "Preface to the special issues on {NTCIR-4}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "237--242",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kato:2005:ODQ,
  author =       "Tsuneaki Kato and Jun'ichi Fukumoto and Fumito Masui
                 and Noriko Kando",
  title =        "Are open-domain question answering technologies useful
                 for information access dialogues?---an empirical study
                 and a proposal of a novel challenge",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "243--262",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Isozaki:2005:AHP,
  author =       "Hideki Isozaki",
  title =        "An analysis of a high-performance {Japanese} question
                 answering system",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "263--279",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Mori:2005:JQA,
  author =       "Tatsunori Mori",
  title =        "{Japanese} question-answering system using {A*} search
                 and its improvement",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "280--304",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Mori:2005:MAF,
  author =       "Tatsunori Mori and Masanori Nozawa and Yoshiaki
                 Asada",
  title =        "Multi-answer-focused multi-document summarization
                 using a question-answering engine",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "305--320",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Okazaki:2005:ICO,
  author =       "Naoaki Okazaki and Yutaka Matsuo and Mitsuru
                 Ishizuka",
  title =        "Improving chronological ordering of sentences
                 extracted from multiple newspaper articles",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "321--339",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Yoshioka:2005:CPB,
  author =       "Masaharu Yoshioka and Makoto Haraguchi",
  title =        "On a combination of probabilistic and {Boolean} {IR}
                 models for {WWW} document retrieval",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "340--356",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Lingpeng:2005:CIR,
  author =       "Yang Lingpeng and Ji Donghong and Tang Li and Niu
                 Zhengyu",
  title =        "{Chinese} information retrieval based on terms and
                 relevant terms",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "357--374",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sakai:2006:ISI,
  author =       "Tetsuya Sakai and Yuji Matsumoto",
  title =        "Introduction to the special issue: {Recent} advances
                 in information processing and access for {Japanese}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "375--376",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Doi:2006:EBM,
  author =       "Takao Doi and Hirofumi Yamamoto and Eiichiro Sumita",
  title =        "Example-based machine translation using efficient
                 sentence retrieval based on edit-distance",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "377--399",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Tomiura:2006:ESS,
  author =       "Yoichi Tomiura and Shosaku Tanaka and Toru Hitaka",
  title =        "Estimating satisfactoriness of selectional restriction
                 from corpus without a thesaurus",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "400--416",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Iida:2006:ARA,
  author =       "Ryu Iida and Kentaro Inui and Yuji Matsumoto",
  title =        "Anaphora resolution by antecedent identification
                 followed by anaphoricity determination",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "417--434",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Inui:2006:ACK,
  author =       "Takashi Inui and Kentaro Inui and Yuji Matsumoto",
  title =        "Acquiring causal knowledge from text using the
                 connective marker {\em tame\/}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "435--474",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Ma:2006:TSB,
  author =       "Qiang Ma and Katsumi Tanaka",
  title =        "Topic-structure-based complementary information
                 retrieval and its application",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "475--503",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Park:2006:ATM,
  author =       "Jong C. Park and Gary Geunbae Lee and Limsoon Wong",
  title =        "{AUTHOR}: {Text} mining and management in
                 biomedicine",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "1--3",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Park:2006:MBB,
  author =       "Kyung-Mi Park and Seon-Ho Kim and Hae-Chang Rim and
                 Young-Sook Hwang",
  title =        "{ME}-based biomedical named entity recognition using
                 lexical knowledge",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "4--21",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Nenadic:2006:MSR,
  author =       "Goran Nenadi{\'c} and Sophia Ananiadou",
  title =        "Mining semantically related terms from biomedical
                 literature",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "22--43",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kim:2006:ECI,
  author =       "Jung-Jae Kim and Jong C. Park",
  title =        "Extracting contrastive information from negation
                 patterns in biomedical literature",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "44--60",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kim:2006:TPL,
  author =       "Eunju Kim and Yu Song and Cheongjae Lee and Kyoungduk
                 Kim and Gary Geunbae Lee and Byoung-Kee Yi and Jeongwon
                 Cha",
  title =        "Two-phase learning for biological event extraction and
                 verification",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "61--73",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Mima:2006:TBK,
  author =       "Hideki Mima and Sophia Ananiadou and Katsumori
                 Matsushima",
  title =        "Terminology-based knowledge mining for new knowledge
                 discovery",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "74--88",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Carpuat:2006:AWS,
  author =       "Marine Carpuat and Pascale Fung and Grace Ngai",
  title =        "Aligning word senses using bilingual corpora",
  journal =      j-TALIP,
  volume =       "5",
  number =       "2",
  pages =        "89--120",
  month =        jun,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1165255.1165256",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Oct 5 07:00:29 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The growing importance of multilingual information
                 retrieval and machine translation has made multilingual
                 ontologies extremely valuable resources. Since the
                 construction of an ontology from scratch is a very
                 expensive and time-consuming undertaking, it is
                 attractive to consider ways of automatically aligning
                 monolingual ontologies, which already exist for many of
                 the world's major languages. Previous research
                 exploited similarity in the structure of the ontologies
                 to align, or manually created bilingual resources.
                 These approaches cannot be used to align ontologies
                 with vastly different structures and can only be
                 applied to much studied language pairs for which
                 expensive resources are already available. In this
                 paper, we propose a novel approach to align the
                 ontologies at the node level: Given a concept
                 represented by a particular word sense in one ontology,
                 our task is to find the best corresponding word sense
                 in the second language ontology. To this end, we
                 present a language-independent, corpus-based method
                 that borrows from techniques used in information
                 retrieval and machine translation. We show its
                 efficiency by applying it to two very different
                 ontologies in very different languages: the Mandarin
                 Chinese HowNet and the American English WordNet.
                 Moreover, we propose a methodology to measure bilingual
                 corpora comparability and show that our method is
                 robust enough to use noisy nonparallel bilingual
                 corpora efficiently, when clean parallel corpora are
                 not available.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Lee:2006:ABN,
  author =       "Chun-Jen Lee and Jason S. Chang and Jyh-Shing R.
                 Jang",
  title =        "Alignment of bilingual named entities in parallel
                 corpora using statistical models and multiple knowledge
                 sources",
  journal =      j-TALIP,
  volume =       "5",
  number =       "2",
  pages =        "121--145",
  month =        jun,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1165255.1165257",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Oct 5 07:00:29 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Named entity (NE) extraction is one of the fundamental
                 tasks in natural language processing (NLP). Although
                 many studies have focused on identifying NEs within
                 monolingual documents, aligning NEs in bilingual
                 documents has not been investigated extensively due to
                 the complexity of the task. In this article we
                 introduce a new approach to aligning bilingual NEs in
                 parallel corpora by incorporating statistical models
                 with multiple knowledge sources. In our approach, we
                 model the process of translating an English NE phrase
                 into a Chinese equivalent using lexical
                 translation\slash transliteration probabilities for
                 word translation and alignment probabilities for word
                 reordering. The method involves automatically learning
                 phrase alignment and acquiring word translations from a
                 bilingual phrase dictionary and parallel corpora, and
                 automatically discovering transliteration
                 transformations from a training set of
                 name-transliteration pairs. The method also involves
                 language-specific knowledge functions, including
                 handling abbreviations, recognizing Chinese personal
                 names, and expanding acronyms. At runtime, the proposed
                 models are applied to each source NE in a pair of
                 bilingual sentences to generate and evaluate the target
                 NE candidates; the source and target NEs are then
                 aligned based on the computed probabilities.
                 Experimental results demonstrate that the proposed
                 approach, which integrates statistical models with
                 extra knowledge sources, is highly feasible and offers
                 significant improvement in performance compared to our
                 previous work, as well as the traditional approach of
                 IBM Model 4.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Shirado:2006:UJH,
  author =       "Tamotsu Shirado and Satoko Marumoto and Masaki Murata
                 and Hitoshi Isahara",
  title =        "Using {Japanese} honorific expressions: a
                 psychological study",
  journal =      j-TALIP,
  volume =       "5",
  number =       "2",
  pages =        "146--164",
  month =        jun,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1165255.1165258",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Oct 5 07:00:29 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "We investigated, via experiment, knowledge of
                 normative honorific expressions as used in textbooks
                 and in practice by people. Forty subjects divided into
                 four groups according to age (younger\slash older) and
                 gender (male\slash female) participated in the
                 experiments. The results show that knowledge about the
                 use of normative honorific expressions in textbooks is
                 similar to that demonstrated by the younger subject
                 groups, but differed from that of the older subject
                 groups. The knowledge of the older subjects was more
                 complex than that shown in textbooks or demonstrated by
                 the younger subjects. A model that can identify misuse
                 of honorific expressions in sentences is the framework
                 for this investigation. The model is minimal, but could
                 represent 76\% to 92\% of the subjects' knowledge
                 regarding each honorific element. This model will be
                 useful in the development of computer-aided systems to
                 help teach how honorific expressions should be used.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Wu:2006:ERT,
  author =       "Chung-Hsien Wu and Ze-Jing Chuang and Yu-Chung Lin",
  title =        "Emotion recognition from text using semantic labels
                 and separable mixture models",
  journal =      j-TALIP,
  volume =       "5",
  number =       "2",
  pages =        "165--183",
  month =        jun,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1165255.1165259",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Oct 5 07:00:29 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This study presents a novel approach to automatic
                 emotion recognition from text. First, emotion
                 generation rules (EGRs) are manually deduced from
                 psychology to represent the conditions for generating
                 emotion. Based on the EGRs, the emotional state of each
                 sentence can be represented as a sequence of semantic
                 labels (SLs) and attributes (ATTs); SLs are defined as
                 the domain-independent features, while ATTs are
                 domain-dependent. The emotion association rules (EARs)
                 represented by SLs and ATTs for each emotion are
                 automatically derived from the sentences in an
                 emotional text corpus using the a priori algorithm.
                 Finally, a separable mixture model (SMM) is adopted to
                 estimate the similarity between an input sentence and
                 the EARs of each emotional state. Since some features
                 defined in this approach are domain-dependent, a dialog
                 system focusing on the students' daily expressions is
                 constructed, and only three emotional states, happy,
                 unhappy, and neutral, are considered for performance
                 evaluation. According to the results of the
                 experiments, given the domain corpus, the proposed
                 approach is promising, and easily ported into other
                 domains.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Dale:2006:ISS,
  author =       "Robert Dale",
  title =        "Introduction to the {Special} section: {Extended} best
                 papers from {IJCNLP 2005}",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "183--184",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Oh:2006:MTM,
  author =       "Jong-Hoon Oh and Key-Sun Choi and Hitoshi Isahara",
  title =        "A machine transliteration model based on
                 correspondence between graphemes and phonemes",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "185--208",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Gao:2006:ESL,
  author =       "Jianfeng Gao and Hisami Suzuki and Wei Yuan",
  title =        "An empirical study on language model adaptation",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "209--227",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Ye:2006:SRL,
  author =       "Patrick Ye and Timothy Baldwin",
  title =        "Semantic role labeling of prepositional phrases",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "228--244",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Chung:2006:APD,
  author =       "Tze Leung Chung and Robert Wing Pong Luk and Kam Fai
                 Wong and Kui Lam Kwok and Dik Lun Lee",
  title =        "Adapting pivoted document-length normalization for
                 query size: {Experiments} in {Chinese} and {English}",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "245--263",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Matsumura:2006:ERB,
  author =       "Atsushi Matsumura and Atsuhiro Takasu and Jun
                 Adachi",
  title =        "Effect of relationships between words on {Japanese}
                 information retrieval",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "264--289",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Song:2006:ISI,
  author =       "Dawei Song and Jian-Yun Nie",
  title =        "Introduction to special issue on reasoning in natural
                 language information processing",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "291--295",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Nie:2006:ILM,
  author =       "Jian-Yun Nie and Guihong Cao and Jing Bai",
  title =        "Inferential language models for information
                 retrieval",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "296--322",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Gao:2006:SQT,
  author =       "Jianfeng Gao and Jian-Yun Nie and Ming Zhou",
  title =        "Statistical query translation models for
                 cross-language information retrieval",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "323--359",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Liu:2006:SFQ,
  author =       "Yi Liu and Rong Jin and Joyce Y. Chai",
  title =        "A statistical framework for query translation
                 disambiguation",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "360--387",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Li:2006:TTT,
  author =       "Baoli Li and Wenjie Li and Qin Lu",
  title =        "Topic tracking with time granularity reasoning",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "388--412",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Phan:2006:IDS,
  author =       "Xuan-Hieu Phan and Le-Minh Nguyen and Yasushi Inoguchi
                 and Tu-Bao Ho and Susumu Horiguchi",
  title =        "Improving discriminative sequential learning by
                 discovering important association of statistics",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "413--438",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Chen:2007:UDM,
  author =       "Yong Chen and Kwok-Ping Chan",
  title =        "Using data mining techniques and rough set theory for
                 language modeling",
  journal =      j-TALIP,
  volume =       "6",
  number =       "1",
  pages =        "??--??",
  month =        apr,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Hsu:2007:MSB,
  author =       "Chung-Chian Hsu and Chien-Hsing Chen and Tien-Teng
                 Shih and Chun-Kai Chen",
  title =        "Measuring similarity between transliterations against
                 noise data",
  journal =      j-TALIP,
  volume =       "6",
  number =       "1",
  pages =        "??--??",
  month =        apr,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sakai:2007:RFQ,
  author =       "Tetsuya Sakai",
  title =        "On the reliability of factoid question answering
                 evaluation",
  journal =      j-TALIP,
  volume =       "6",
  number =       "1",
  pages =        "??--??",
  month =        apr,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Wiseman:2007:CBC,
  author =       "Yair Wiseman and Irit Gefner",
  title =        "Conjugation-based compression for {Hebrew} texts",
  journal =      j-TALIP,
  volume =       "6",
  number =       "1",
  pages =        "??--??",
  month =        apr,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Wu:2007:TBS,
  author =       "Chung-Hsien Wu and Hung-Yu Su and Yu-Hsien Chiu and
                 Chia-Hung Lin",
  title =        "Transfer-based statistical translation of {Taiwanese}
                 sign language using {PCFG}",
  journal =      j-TALIP,
  volume =       "6",
  number =       "1",
  pages =        "??--??",
  month =        apr,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kuo:2007:PSM,
  author =       "Jin-Shea Kuo and Haizhou Li and Ying-Kuei Yang",
  title =        "A phonetic similarity model for automatic extraction
                 of transliteration pairs",
  journal =      j-TALIP,
  volume =       "6",
  number =       "2",
  pages =        "6:1--6:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1282080.1282081",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:11:28 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article proposes an approach for the automatic
                 extraction of transliteration pairs from Chinese Web
                 corpora. In this approach, we formulate the machine
                 transliteration process using a syllable-based phonetic
                 similarity model which consists of phonetic confusion
                 matrices and a Chinese character n -gram language
                 model. With the phonetic similarity model, the
                 extraction of transliteration pairs becomes a two-step
                 process of recognition followed by validation: First,
                 in the recognition process, we identify the most
                 probable transliteration in the k -neighborhood of a
                 recognized English word. Then, in the validation
                 process, we qualify the transliteration pair candidates
                 with a hypothesis test. We carry out an analytical
                 study on the statistics of several key factors in
                 English--Chinese transliteration to help formulate
                 phonetic similarity modeling. We then conduct both
                 supervised and unsupervised learning of a phonetic
                 similarity model on a development database. The
                 experimental results validate the effectiveness of the
                 phonetic similarity model by achieving an $F$-measure
                 of 0.739 in supervised learning. The unsupervised
                 learning approach works almost as well as the
                 supervised one, thus allowing us to deploy automatic
                 extraction of transliteration pairs in the Web space.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "extraction of transliteration pairs; machine
                 translation; machine transliteration; phonetic
                 confusion probability; phonetic similarity modeling",
}

@Article{Xiao:2007:SNM,
  author =       "Jinghui Xiao and Xiaolong Wang and Bingquan Liu",
  title =        "The study of a nonstationary maximum entropy {Markov}
                 model and its application on the pos-tagging task",
  journal =      j-TALIP,
  volume =       "6",
  number =       "2",
  pages =        "7:1--7:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1282080.1282082",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:11:28 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Sequence labeling is a core task in natural language
                 processing. The maximum entropy Markov model (MEMM) is
                 a powerful tool in performing this task. This article
                 enhances the traditional MEMM by exploiting the
                 positional information of language elements. The
                 stationary hypothesis is relaxed in MEMM, and the
                 nonstationary MEMM (NS-MEMM) is proposed. Several
                 related issues are discussed in detail, including the
                 representation of positional information, NS-MEMM
                 implementation, smoothing techniques, and the space
                 complexity issue. Furthermore, the asymmetric NS-MEMM
                 presents a more flexible way to exploit positional
                 information. In the experiments, NS-MEMM is evaluated
                 on both the Chinese and the English pos-tagging tasks.
                 According to the experimental results, NS-MEMM yields
                 effective improvements over MEMM by exploiting
                 positional information. The smoothing techniques in
                 this article effectively solve the NS-MEMM
                 data-sparseness problem; the asymmetric NS-MEMM is also
                 an improvement by exploiting positional information in
                 a more flexible way.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "data sparseness problem; Markov property; MEMM;
                 pos-tagging; stationary hypothesis",
}

@Article{Zhuang:2007:IHD,
  author =       "Yl Zhuang and Yueting Zhuang and Qing Li and Lei
                 Chen",
  title =        "Interactive high-dimensional index for large {Chinese}
                 calligraphic character databases",
  journal =      j-TALIP,
  volume =       "6",
  number =       "2",
  pages =        "8:1--8:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1282080.1282083",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:11:28 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The large numbers of Chinese calligraphic scripts in
                 existence are valuable part of the Chinese cultural
                 heritage. However, due to the shape complexity of these
                 characters, it is hard to employ existing techniques to
                 effectively retrieve and efficiently index them. In
                 this article, using a novel shape-similarity- based
                 retrieval method in which shapes of calligraphic
                 characters are represented by their contour points
                 extracted from the character images, we propose an
                 interactive partial-distance-map (PDM)- based
                 high-dimensional indexing scheme which is designed
                 specifically to speed up the retrieval performance of
                 the large Chinese calligraphic character databases
                 effectively. Specifically, we use the approximate
                 minimal bounding sphere of a query character and
                 utilize users' relevance feedback to refine the query
                 gradually. Comprehensive experiments are conducted to
                 testify the efficiency and effectiveness of this
                 method. In addition, a new $k$-NN search called Pseudo
                 $k$-NN (P $k$-NN) search is presented to better
                 facilitate the PDM-based character retrieval.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Chinese calligraphic character; hyper-centre
                 relocation; Pseudo k-NN",
}

@Article{Saraswathi:2007:CPE,
  author =       "S. Saraswathi and T. V. Geetha",
  title =        "Comparison of performance of enhanced morpheme-based
                 language model with different word-based language
                 models for improving the performance of {Tamil} speech
                 recognition system",
  journal =      j-TALIP,
  volume =       "6",
  number =       "3",
  pages =        "9:1--9:??",
  month =        nov,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1290002.1290003",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:11:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This paper describes a new technique of language
                 modeling for a highly inflectional Dravidian language,
                 Tamil. It aims to alleviate the main problems
                 encountered in processing of Tamil language, like
                 enormous vocabulary growth caused by the large number
                 of different forms derived from one word. The size of
                 the vocabulary was reduced by, decomposing the words
                 into stems and endings and storing these sub word units
                 (morphemes) in the vocabulary separately. A enhanced
                 morpheme-based language model was designed for the
                 inflectional language Tamil. The enhanced
                 morpheme-based language model was trained on the
                 decomposed corpus. The perplexity and Word Error Rate
                 (WER) were obtained to check the efficiency of the
                 model for Tamil speech recognition system. The results
                 were compared with word-based bigram and trigram
                 language models, distance based language model,
                 dependency based language model and class based
                 language model. From the results it was analyzed that
                 the enhanced morpheme-based trigram model with Katz
                 back-off smoothing effect improved the performance of
                 the Tamil speech recognition system when compared to
                 the word-based language models.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "language model; morphemes; perplexity; word error rate
                 and speech recognition",
}

@Article{Hussain:2007:DLS,
  author =       "Sarmad Hussain and Sana Gul and Afifah Waseem",
  title =        "Developing lexicographic sorting: {An} example for
                 {Urdu}",
  journal =      j-TALIP,
  volume =       "6",
  number =       "3",
  pages =        "10:1--10:??",
  month =        nov,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1290002.1290004",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:11:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Collation or lexicographic sorting is essential to
                 develop multilingual computing. This paper presents the
                 challenges faced in developing collation sequence for a
                 language. The paper discusses both theoretical
                 linguistic and practical standardization and encoding
                 related considerations that need to be addressed for
                 languages for which relevant standards and/or solutions
                 have not been defined. The paper also defines the
                 process, by giving the details of the procedure
                 followed for Urdu language, which is the national
                 language of Pakistan and is spoken by more than 100
                 million people across the world. The paper is oriented
                 towards organizations involved in developing and using
                 collation standards and the localization industry, and
                 not focused on theoretical issues.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "text processing; Urdu",
}

@Article{Fukumoto:2007:TTB,
  author =       "Fumiyo Fukumoto and Yoshimi Suzuki",
  title =        "Topic tracking based on bilingual comparable corpora
                 and semisupervised clustering",
  journal =      j-TALIP,
  volume =       "6",
  number =       "3",
  pages =        "11:1--11:??",
  month =        nov,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1290002.1290005",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:11:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "In this paper, we address the problem of skewed data
                 in topic tracking: the small number of stories labeled
                 positive as compared to negative stories and propose a
                 method for estimating effective training stories for
                 the topic-tracking task. For a small number of labeled
                 positive stories, we use bilingual comparable, i.e.,
                 English, and Japanese corpora, together with the EDR
                 bilingual dictionary, and extract story pairs
                 consisting of positive and associated stories. To
                 overcome the problem of a large number of labeled
                 negative stories, we classified them into clusters.
                 This is done using a semisupervised clustering
                 algorithm, combining $k$ means with EM. The method was
                 tested on the TDT English corpus and the results showed
                 that the system works well when the topic under
                 tracking is talking about an event originating in the
                 source language country, even for a small number of
                 initial positive training stories.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "bilingual comparable corpora; clustering; EM
                 algorithm; N-gram model; topic detection and tracking",
}

@Article{Iida:2007:ZAR,
  author =       "Ryu Iida and Kentaro Inui and Yuji Matsumoto",
  title =        "Zero-anaphora resolution by learning rich syntactic
                 pattern features",
  journal =      j-TALIP,
  volume =       "6",
  number =       "4",
  pages =        "1:1--1:22",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1316457.1316458",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:11:55 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "We approach the zero-anaphora resolution problem by
                 decomposing it into intrasentential and intersentential
                 zero-anaphora resolution tasks. For the former task,
                 syntactic patterns of zeropronouns and their
                 antecedents are useful clues. Taking Japanese as a
                 target language, we empirically demonstrate that
                 incorporating rich syntactic pattern features in a
                 state-of-the-art learning-based anaphora resolution
                 model dramatically improves the accuracy of
                 intrasentential zero-anaphora, which consequently
                 improves the overall performance of zero-anaphora
                 resolution.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Adriani:2007:SIC,
  author =       "Mirna Adriani and Jelita Asian and Bobby Nazief and S.
                 M. M. Tahaghoghi and Hugh E. Williams",
  title =        "Stemming {Indonesian}: a confix-stripping approach",
  journal =      j-TALIP,
  volume =       "6",
  number =       "4",
  pages =        "2:1--2:33",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1316457.1316458",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:11:55 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Stemming words to (usually) remove suffixes has
                 applications in text search, machine translation,
                 document summarization, and text classification. For
                 example, English stemming reduces the words 'computer,'
                 'computing,' 'computation,' and 'computability' to
                 their common morphological root, 'comput-.' In text
                 search, this permits a search for 'computers' to find
                 documents containing all words with the stem 'comput-.'
                 In the Indonesian language, stemming is of crucial
                 importance: words have prefixes, suffixes, infixes, and
                 confixes that make matching related words
                 difficult.\par

                 This work surveys existing techniques for stemming
                 Indonesian words to their morphological roots, presents
                 our novel and highly accurate CS algorithm, and
                 explores the effectiveness of stemming in the context
                 of general-purpose text information retrieval through
                 ad hoc queries.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Indonesian; information retrieval; stemming",
}

@Article{Thao:2007:NER,
  author =       "Pham Thi Xuan Thao and Tran Quoc Tri and Dinh Dien and
                 Nigel Collier",
  title =        "Named entity recognition in {Vietnamese} using
                 classifier voting",
  journal =      j-TALIP,
  volume =       "6",
  number =       "4",
  pages =        "3:1--3:18",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1316457.1316460",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:11:55 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Named entity recognition (NER) is one of the
                 fundamental tasks in natural-language processing (NLP).
                 Though the combination of different classifiers has
                 been widely applied in several well-studied languages,
                 this is the first time this method has been applied to
                 Vietnamese. In this article, we describe how voting
                 techniques can improve the performance of Vietnamese
                 NER. By combining several state-of-the-art
                 machine-learning algorithms using voting strategies,
                 our final result outperforms individual algorithms and
                 gained an $F$-measure of 89.12. A detailed discussion
                 about the challenges of NER in Vietnamese is also
                 presented.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "C4.5; Conditional Random Fields; Na{\"\i}ve Bayes
                 named entity recognition; support vector machines;
                 transformation based learning; Vietnamese; voting",
}

@Article{Chen:2008:SBM,
  author =       "Yufeng Chen and Chengqing Zong",
  title =        "A Structure-Based Model for {Chinese} Organization
                 Name Translation",
  journal =      j-TALIP,
  volume =       "7",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1330291.1330292",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:12:10 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Named entity (NE) translation is a fundamental task in
                 multilingual natural language processing. The
                 performance of a machine translation system depends
                 heavily on precise translation of the inclusive NEs.
                 Furthermore, organization name (ON) is the most complex
                 NE for translation among all the NEs. In this article,
                 the structure formulation of ONs is investigated and a
                 hierarchical structure-based ON translation model for
                 Chinese-to-English translation system is
                 presented.\par

                 First, the model performs ON chunking; then both the
                 translation of words within chunks and the process of
                 chunk-reordering are achieved by synchronous
                 context-free grammar (CFG). The CFG rules are extracted
                 from bilingual ON pairs in a training program.\par

                 The main contributions of this article are: (1)
                 defining appropriate chunk-units for analyzing the
                 internal structure of Chinese ONs; (2) making the
                 chunk-based ON translation feasible and flexible via a
                 hierarchical CFG derivation; and (3) proposing a
                 training architecture to automatically learn the
                 synchronous CFG for constructing ONs with chunk-units
                 from aligned bilingual ON pairs. The experiments show
                 that the proposed approach translates the Chinese ONs
                 into English with an accuracy of 93.75\% and
                 significantly improves the performance of a baseline
                 statistical machine translation (SMT) system.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "alignment; chunk; hierarchical derivation; machine
                 translation; named entity; organization name; rules
                 extraction; structural analysis; synchronous
                 context-free grammar",
}

@Article{Jeong:2008:ISR,
  author =       "Minwoo Jeong and Gary Geunbae Lee",
  title =        "Improving Speech Recognition and Understanding using
                 Error-Corrective Reranking",
  journal =      j-TALIP,
  volume =       "7",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1330291.1330293",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:12:10 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The main issues of practical spoken-language
                 applications for human-computer interface are how to
                 overcome speech recognition errors and guarantee the
                 reasonable end-performance of spoken-language
                 applications. Therefore, handling the erroneously
                 recognized outputs is a key in developing robust
                 spoken-language systems. To address this problem, we
                 present a method to improve the accuracy of speech
                 recognition and performance of spoken-language
                 applications. The proposed error corrective reranking
                 approach exploits recognition environment
                 characteristics and domain-specific semantic
                 information to provide robustness and adaptability for
                 a spoken-language system. We demonstrate some
                 experiments of spoken dialogue tasks and empirical
                 results that show an improvement in accuracy for both
                 speech recognition and spoken-language understanding.
                 In our experiment, we show an error reduction of up to
                 9.7\% and 16.8\%; of word error rate, and 5.5\% and
                 7.9\% of understanding error for the air travel and
                 telebanking service domains.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "automatic speech recognition; error-corrective
                 reranking; improving spoken dialogue system;
                 spoken-language understanding",
}

@Article{Kuo:2008:MSG,
  author =       "June-Jei Kuo and Hsin-Hsi Chen",
  title =        "Multidocument Summary Generation: Using Informative
                 and Event Words",
  journal =      j-TALIP,
  volume =       "7",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1330291.1330294",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:12:10 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Summary generation for multiple documents poses a
                 number of issues including sentence selection, sentence
                 ordering, and sentence reduction over single-document
                 summarization. In addition, the temporal resolution
                 among extracted sentences is also important. This
                 article considers informative words and event words to
                 deal with multidocument summarization. These words
                 indicate the important concepts and relationships in a
                 document or among a set of documents, and can be used
                 to select salient sentences. We present a temporal
                 resolution algorithm, using focusing time and
                 coreference chains, to convert Chinese temporal
                 expressions in a document into calendrical forms.
                 Moreover, we consider the last calendrical form of a
                 sentence as a sentence time stamp to address sentence
                 ordering. Informative words, event words, and temporal
                 words are introduced to a sentence reduction algorithm,
                 which deals with both length constraints and
                 information coverage. Experiments on Chinese-news data
                 sets show significant improvements of both information
                 coverage and readability.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "latent semantic analysis; multidocument summary
                 generation; sentence ordering; sentence reduction;
                 sentence selection; temporal processing",
}

@Article{Kando:2008:INS,
  author =       "Noriko Kando and Teruko Mitamura and Tetsuya Sakai",
  title =        "Introduction to the {NTCIR-6 Special Issue}",
  journal =      j-TALIP,
  volume =       "7",
  number =       "2",
  pages =        "4:1--4:??",
  month =        jun,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1362782.1362783",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:12:23 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Zhou:2008:HTE,
  author =       "Dong Zhou and Mark Truran and Tim Brailsford and Helen
                 Ashman",
  title =        "A Hybrid Technique for {English--Chinese} Cross
                 Language Information Retrieval",
  journal =      j-TALIP,
  volume =       "7",
  number =       "2",
  pages =        "5:1--5:??",
  month =        jun,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1362782.1362784",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:12:23 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "In this article we describe a hybrid technique for
                 dictionary-based query translation suitable for
                 English--Chinese cross language information retrieval.
                 This technique marries a graph-based model for the
                 resolution of candidate term ambiguity with a
                 pattern-based method for the translation of
                 out-of-vocabulary (OOV) terms. We evaluate the
                 performance of this hybrid technique in an experiment
                 using several NTCIR test collections. Experimental
                 results indicate a substantial increase in retrieval
                 effectiveness over various baseline systems
                 incorporating machine- and dictionary-based
                 translation.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "cross language information retrieval; disambiguation;
                 graph-based analysis; patterns; unknown term
                 translation",
}

@Article{Higashinaka:2008:AAC,
  author =       "Ryuichiro Higashinaka and Hideki Isozaki",
  title =        "Automatically Acquiring Causal Expression Patterns
                 from Relation-annotated Corpora to Improve Question
                 Answering for why-Questions",
  journal =      j-TALIP,
  volume =       "7",
  number =       "2",
  pages =        "6:1--6:??",
  month =        jun,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1362782.1362785",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:12:23 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article describes our approach for answering
                 why-questions that we initially introduced at NTCIR-6
                 QAC-4. The approach automatically acquires causal
                 expression patterns from relation-annotated corpora by
                 abstracting text spans annotated with a causal relation
                 and by mining syntactic patterns that are useful for
                 distinguishing sentences annotated with a causal
                 relation from those annotated with other relations. We
                 use these automatically acquired causal expression
                 patterns to create features to represent answer
                 candidates, and use these features together with other
                 possible features related to causality to train an
                 answer candidate ranker that maximizes the QA
                 performance with regards to the corpus of why-questions
                 and answers. NAZEQA, a Japanese why-QA system based on
                 our approach, clearly outperforms baselines with a Mean
                 Reciprocal Rank (top-5) of 0.223 when sentences are
                 used as answers and with a MRR (top-5) of 0.326 when
                 paragraphs are used as answers, making it presumably
                 the best-performing fully implemented why-QA system.
                 Experimental results also verified the usefulness of
                 the automatically acquired causal expression
                 patterns.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "causal expression; pattern mining; question answering;
                 relation-annotated corpus",
}

@Article{Li:2008:ASV,
  author =       "Yaoyong Li and Kalina Bontcheva",
  title =        "Adapting Support Vector Machines for ${F}$-term-based
                 Classification of Patents",
  journal =      j-TALIP,
  volume =       "7",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jun,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1362782.1362786",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:12:23 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Support Vector Machines (SVM) have obtained
                 state-of-the-art results on many applications including
                 document classification. However, previous works on
                 applying SVMs to the $F$-term patent classification
                 task did not obtain as good results as other learning
                 algorithms such as k-NN. This is due to the fact that
                 $F$-term patent classification is different from
                 conventional document classification in several
                 aspects, mainly because it is a multiclass, multilabel
                 classification problem with semi-structured documents
                 and multi-faceted hierarchical categories.\par

                 This article describes our SVM-based system and several
                 techniques we developed successfully to adapt SVM for
                 the specific features of the $F$-term patent
                 classification task. We evaluate the techniques using
                 the NTCIR-6 $F$-term classification terms assigned to
                 Japanese patents. Moreover, our system participated in
                 the NTCIR-6 patent classification evaluation and
                 obtained the best results according to two of the three
                 metrics used for task performance evaluation. Following
                 the NTCIR-6 participation, we developed two new
                 techniques, which achieved even better scores using all
                 three NTCIR-6 metrics, effectively outperforming all
                 participating systems. This article presents this new
                 work and the experimental results that demonstrate the
                 benefits of the latest approach.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "F-term classification; patent processing; support
                 vector machines",
}

@Article{Fukumoto:2008:ICL,
  author =       "Fumiyo Fukumoto and Yoshimi Suzuki",
  title =        "Integrating Cross-Language Hierarchies and Its
                 Application to Retrieving Relevant Documents",
  journal =      j-TALIP,
  volume =       "7",
  number =       "3",
  pages =        "8:1--8:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1386869.1386870",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Aug 22 13:11:51 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Internet directories such as Yahoo! are an approach to
                 improve the efficacy and efficiency of Information
                 Retrieval (IR) on the Web, as pages (documents) are
                 organized into hierarchical categories, and similar
                 pages are grouped together. Most of the search engines
                 on the Web service find documents that are assigned to
                 a single classification hierarchy. Categories in the
                 hierarchy are carefully defined by human experts and
                 documents are well organized. However, a single
                 hierarchy in one language is often insufficient to find
                 all relevant material, as each hierarchy tends to have
                 some bias in both defining hierarchical structure and
                 classifying documents. Moreover, documents written in a
                 language other than the user's native language often
                 include large amounts of information related to the
                 user's request. In this article, we propose a method of
                 integrating cross-language (CL) category hierarchies,
                 that is, Reuters '96 hierarchy and UDC code hierarchy
                 of Japanese by estimating category similarities. The
                 method does not simply merge two different hierarchies
                 into one large hierarchy but instead extracts sets of
                 similar categories, where each element of the sets is
                 relevant with each other. It consists of three steps.
                 First, we classify documents from one hierarchy into
                 categories with another hierarchy using a
                 cross-language text classification (CLTC) technique,
                 and extract category pairs of two hierarchies. Next, we
                 apply $\chi^2$ statistics to these pairs to
                 obtain similar category pairs, and finally we apply the
                 generating function of the Apriori algorithm
                 (Apriori-Gen) to the category pairs, and find sets of
                 similar categories. Moreover, we examined whether
                 integrating hierarchies helps to support retrieval of
                 documents with similar contents. The retrieval results
                 showed a 42.7\% improvement over the baseline
                 nonhierarchy model, and a 21.6\% improvement over a
                 single hierarchy.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "cross-language hierarchies; information integration;
                 retrieval of relevant documents; text classification",
}

@Article{Sharma:2008:AMI,
  author =       "Utpal Sharma and Jugal K. Kalita and Rajib K. Das",
  title =        "Acquisition of Morphology of an {Indic} Language from
                 Text Corpus",
  journal =      j-TALIP,
  volume =       "7",
  number =       "3",
  pages =        "9:1--9:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1386869.1386871",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Aug 22 13:11:51 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article describes an approach to unsupervised
                 learning of morphology from an unannotated corpus for a
                 highly inflectional Indo-European language called
                 Assamese spoken by about 30 million people. Although
                 Assamese is one of India's national languages, it
                 utterly lacks computational linguistic resources. There
                 exists no prior computational work on this language
                 spoken widely in northeast India. The work presented is
                 pioneering in this respect. In this article, we discuss
                 salient issues in Assamese morphology where the
                 presence of a large number of suffixal determiners,
                 sandhi, samas, and the propensity to use suffix
                 sequences make approximately 50\% of the words used in
                 written and spoken text inflected. We implement methods
                 proposed by Gaussier and Goldsmith on acquisition of
                 morphological knowledge, and obtain F-measure
                 performance below 60\%. This motivates us to present a
                 method more suitable for handling suffix sequences,
                 enabling us to increase the F-measure performance of
                 morphology acquisition to almost 70\%. We describe how
                 we build a morphological dictionary for Assamese from
                 the text corpus. Using the morphological knowledge
                 acquired and the morphological dictionary, we are able
                 to process small chunks of data at a time as well as a
                 large corpus. We achieve approximately 85\% precision
                 and recall during the analysis of small chunks of
                 coherent text.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Assamese; Indo-European languages; machine learning;
                 morphology",
}

@Article{Chen:2008:TTR,
  author =       "Jiang-Chun Chen and Jyh-Shing Roger Jang",
  title =        "{TRUES}: {Tone Recognition Using Extended Segments}",
  journal =      j-TALIP,
  volume =       "7",
  number =       "3",
  pages =        "10:1--10:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1386869.1386872",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Aug 22 13:11:51 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Tone recognition has been a basic but important task
                 for speech recognition and assessment of tonal
                 languages, such as Mandarin Chinese. Most previously
                 proposed approaches adopt a two-step approach where
                 syllables within an utterance are identified via forced
                 alignment first, and tone recognition using a variety
                 of classifiers---such as neural networks, Gaussian
                 mixture models (GMM), hidden Markov models (HMM),
                 support vector machines (SVM)---is then performed on
                 each segmented syllable to predict its tone. However,
                 forced alignment does not always generate accurate
                 syllable boundaries, leading to unstable
                 voiced-unvoiced detection and deteriorating performance
                 in tone recognition. Aiming to alleviate this problem,
                 we propose a robust approach called Tone Recognition
                 Using Extended Segments (TRUES) for HMM-based
                 continuous tone recognition. The proposed approach
                 extracts an unbroken pitch contour from a given
                 utterance based on dynamic programming over time-domain
                 acoustic features of average magnitude difference
                 function (AMDF). The pitch contour of each syllable is
                 then extended for tri-tone HMM modeling, such that the
                 influence from inaccurate syllable boundaries is
                 lessened. Our experimental results demonstrate that the
                 proposed TRUES achieves 49.13\% relative error rate
                 reduction over that of the recently proposed supratone
                 modeling, which is deemed the state of the art of tone
                 recognition that outperforms several previously
                 proposed approaches. The encouraging improvement
                 demonstrates the effectiveness and robustness of the
                 proposed TRUES, as well as the corresponding pitch
                 determination algorithm which produces unbroken pitch
                 contours.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "context-dependent tone modeling; continuous tone
                 recognition; extended segment for tone recognition;
                 HMM; Mandarin Chinese; supratone modeling",
}

@Article{Lin:2008:VCD,
  author =       "Jeng-Wei Lin and Jan-Ming Ho and Li-Ming Tseng and
                 Feipei Lai",
  title =        "Variant {Chinese} Domain Name Resolution",
  journal =      j-TALIP,
  volume =       "7",
  number =       "4",
  pages =        "11:1--11:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1450295.1450296",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Dec 8 13:56:10 MST 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Many efforts in past years have been made to lower the
                 linguistic barriers for non-native English speakers to
                 access the Internet. Internet standard RFC 3490,
                 referred to as IDNA (Internationalizing Domain Names in
                 Applications), focuses on access to IDNs
                 (Internationalized Domain Names) in a range of scripts
                 that is broader in scope than the original ASCII.
                 However, the use of character variants that have
                 similar appearances and/or interpretations could create
                 confusion. A variant IDL (Internationalized Domain
                 Label), derived from an IDL by replacing some
                 characters with their variants, should match the
                 original IDL; and thus a variant IDN does. In RFC 3743,
                 referred to as JET (Joint Engineering Team) Guidelines,
                 it is suggested that zone administrators model this
                 concept of equivalence as an atomic IDL package. When
                 an IDL is registered, an IDL package is created that
                 contains its variant IDLs generated according to the
                 zone-specific Language Variant Tables (LVTs). In
                 addition to the registered IDL, the name holder can
                 request the domain registry to activate some of the
                 variant IDLs, free or by an extra fee. The activated
                 variant IDLs are stored in the zone files, and thus
                 become resolvable. However, an issue of scalability
                 arises when there is a large number of variant IDLs to
                 be activated.\par

                 In this article, the authors present a resolution
                 protocol that resolves the variant IDLs into the
                 registered IDL, specifically for Han character
                 variants. Two Han characters are said to be variants of
                 each other if they have the same meaning and are
                 pronounced the same. Furthermore, Han character
                 variants usually have similar appearances. It is not
                 uncommon that a Chinese IDL has a large number of
                 variant IDLs. The proposed protocol introduces a new RR
                 (resource record) type, denoted as VarIdx RR, to
                 associate a variant expression of the variant IDLs with
                 the registered IDL. The label of the VarIdx RR, denoted
                 as the variant index, is assigned by an indexing
                 function that is designed to give the same value to all
                 of the variant IDLs enumerated by the variant
                 expression. When one of the variant IDLs is accessed,
                 Internet applications can compute the variant index,
                 look up the VarIdx RRs, and resolve the variant IDL
                 into the registered IDL.\par

                 The authors examine two sets of Chinese IDLs registered
                 in TWNIC and CNNIC, respectively. The results show that
                 for a registered Chinese IDL, a very small number of
                 VarIdx RRs, usually one or two, are sufficient to
                 activate all of its variant IDLs. The authors also
                 represent a Web redirection service that employs the
                 proposed resolution protocol to redirect a URL
                 addressed by a variant IDN to the URL addressed by the
                 registered IDN. The experiment results show that the
                 proposed protocol successfully resolves the variant
                 IDNs into the registered IDNs.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "conversion between traditional Chinese and simplified
                 Chinese; Han character folding; Han character variant;
                 IDN spoof; internationalized domain name;
                 localization",
}

@Article{Lee:2008:BCQ,
  author =       "Cheng-Wei Lee and Min-Yuh Day and Cheng-Lung Sung and
                 Yi-Hsun Lee and Tian-Jian Jiang and Chia-Wei Wu and
                 Cheng-Wei Shih and Yu-Ren Chen and Wen-Lian Hsu",
  title =        "Boosting {Chinese} Question Answering with Two
                 Lightweight Methods: {ABSPs} and {SCO-QAT}",
  journal =      j-TALIP,
  volume =       "7",
  number =       "4",
  pages =        "12:1--12:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1450295.1450297",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Dec 8 13:56:10 MST 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Question Answering (QA) research has been conducted in
                 many languages. Nearly all the top performing systems
                 use heavy methods that require sophisticated
                 techniques, such as parsers or logic provers. However,
                 such techniques are usually unavailable or unaffordable
                 for under-resourced languages or in resource-limited
                 situations. In this article, we describe how a
                 top-performing Chinese QA system can be designed by
                 using lightweight methods effectively. We propose two
                 lightweight methods, namely the Sum of Co-occurrences
                 of Question and Answer Terms (SCO-QAT) and
                 Alignment-based Surface Patterns (ABSPs). SCO-QAT is a
                 co-occurrence-based answer-ranking method that does not
                 need extra knowledge, word-ignoring heuristic rules, or
                 tools. It calculates co-occurrence scores based on the
                 passage retrieval results. ABSPs are syntactic patterns
                 trained from question-answer pairs with a multiple
                 alignment algorithm. They are used to capture the
                 relations between terms and then use the relations to
                 filter answers. We attribute the success of the ABSPs
                 and SCO-QAT methods to the effective use of local
                 syntactic information and global co-occurrence
                 information.\par

                 By using SCO-QAT and ABSPs, we improved the RU-Accuracy
                 of our testbed QA system, ASQA, from 0.445 to 0.535 on
                 the NTCIR-5 dataset. It also achieved the top 0.5
                 RU-Accuracy on the NTCIR-6 dataset. The result shows
                 that lightweight methods are not only cheaper to
                 implement, but also have the potential to achieve
                 state-of-the-art performances.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "answer filtering; answer ranking; Chinese question
                 answering; co-occurrence; lightweight method; surface
                 pattern",
}

@Article{Che:2008:UHC,
  author =       "Wanxiang Che and Min Zhang and AiTi Aw and ChewLim Tan
                 and Ting Liu and Sheng Li",
  title =        "Using a Hybrid Convolution Tree Kernel for Semantic
                 Role Labeling",
  journal =      j-TALIP,
  volume =       "7",
  number =       "4",
  pages =        "13:1--13:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1450295.1450298",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Dec 8 13:56:10 MST 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "As a kind of Shallow Semantic Parsing, Semantic Role
                 Labeling (SRL) is gaining more attention as it benefits
                 a wide range of natural language processing
                 applications. Given a sentence, the task of SRL is to
                 recognize semantic arguments (roles) for each predicate
                 (target verb or noun). Feature-based methods have
                 achieved much success in SRL and are regarded as the
                 state-of-the-art methods for SRL. However, these
                 methods are less effective in modeling structured
                 features. As an extension of feature-based methods,
                 kernel-based methods are able to capture structured
                 features more efficiently in a much higher dimension.
                 Application of kernel methods to SRL has been achieved
                 by selecting the tree portion of a predicate and one of
                 its arguments as feature space, which is named as
                 predicate-argument feature (PAF) kernel. The PAF kernel
                 captures the syntactic tree structure features using
                 convolution tree kernel, however, it does not
                 distinguish between the path structure and the
                 constituent structure. In this article, a hybrid
                 convolution tree kernel is proposed to model different
                 linguistic objects. The hybrid convolution tree kernel
                 consists of two individual convolution tree kernels.
                 They are a Path kernel, which captures
                 predicate-argument link features, and a Constituent
                 Structure kernel, which captures the syntactic
                 structure features of arguments. Evaluations on the
                 data sets of the CoNLL-2005 SRL shared task and the
                 Chinese PropBank (CPB) show that our proposed hybrid
                 convolution tree kernel statistically significantly
                 outperforms the previous tree kernels. Moreover, in
                 order to maximize the system performance, we present a
                 composite kernel through combining our hybrid
                 convolution tree kernel method with a feature-based
                 method extended by the polynomial kernel. The
                 experimental results show that the composite kernel
                 achieves better performance than each of the individual
                 methods and outperforms the best reported system on the
                 CoNLL-2005 corpus when only one syntactic parser is
                 used and on the CPB corpus when automated syntactic
                 parse results and correct syntactic parse results are
                 used respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "hybrid convolution tree kernel; semantic role
                 labeling",
}

@Article{Wu:2009:ISI,
  author =       "Chung-Hsien Wu and Haizhou Li",
  title =        "Introduction to the Special Issue on Recent Advances
                 in {Asian} Language Spoken Document Retrieval",
  journal =      j-TALIP,
  volume =       "8",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1482343.1482344",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 23 16:32:22 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Chen:2009:WTM,
  author =       "Berlin Chen",
  title =        "Word Topic Models for Spoken Document Retrieval and
                 Transcription",
  journal =      j-TALIP,
  volume =       "8",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1482343.1482345",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 23 16:32:22 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Statistical language modeling (LM), which aims to
                 capture the regularities in human natural language and
                 quantify the acceptability of a given word sequence,
                 has long been an interesting yet challenging research
                 topic in the speech and language processing community.
                 It also has been introduced to information retrieval
                 (IR) problems, and provided an effective and
                 theoretically attractive probabilistic framework for
                 building IR systems. In this article, we propose a word
                 topic model (WTM) to explore the co-occurrence
                 relationship between words, as well as the long-span
                 latent topical information, for language modeling in
                 spoken document retrieval and transcription. The
                 document or the search history as a whole is modeled as
                 a composite WTM model for generating a newly observed
                 word. The underlying characteristics and different
                 kinds of model structures are extensively investigated,
                 while the performance of WTM is thoroughly analyzed and
                 verified by comparison with the well-known
                 probabilistic latent semantic analysis (PLSA) model as
                 well as the other models. The IR experiments are
                 performed on the TDT Chinese collections (TDT-2 and
                 TDT-3), while the large vocabulary continuous speech
                 recognition (LVCSR) experiments are conducted on the
                 Mandarin broadcast news collected in Taiwan.
                 Experimental results seem to indicate that WTM is a
                 promising alternative to the existing models.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "adaptation; information retrieval; language model;
                 speech recognition; word topic model",
}

@Article{Lin:2009:CSP,
  author =       "Shih-Hsiang Lin and Berlin Chen and Hsin-Min Wang",
  title =        "A Comparative Study of Probabilistic Ranking Models
                 for {Chinese} Spoken Document Summarization",
  journal =      j-TALIP,
  volume =       "8",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1482343.1482346",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 23 16:32:22 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Extractive document summarization automatically
                 selects a number of indicative sentences, passages, or
                 paragraphs from an original document according to a
                 target summarization ratio, and sequences them to form
                 a concise summary. In this article, we present a
                 comparative study of various probabilistic ranking
                 models for spoken document summarization, including
                 supervised classification-based summarizers and
                 unsupervised probabilistic generative summarizers. We
                 also investigate the use of unsupervised summarizers to
                 improve the performance of supervised summarizers when
                 manual labels are not available for training the
                 latter. A novel training data selection approach that
                 leverages the relevance information of spoken sentences
                 to select reliable document-summary pairs derived by
                 the probabilistic generative summarizers is explored
                 for training the classification-based summarizers.
                 Encouraging initial results on Mandarin Chinese
                 broadcast news data are demonstrated.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "extractive summarization; probabilistic ranking
                 models; relevance information; spoken document
                 summarization",
}

@Article{Chen:2009:TSH,
  author =       "Boxing Chen and Min Zhang and Ai Ti Aw",
  title =        "Two-Stage Hypotheses Generation for Spoken Language
                 Translation",
  journal =      j-TALIP,
  volume =       "8",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1482343.1482347",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 23 16:32:22 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Spoken Language Translation (SLT) is the research area
                 that focuses on the translation of speech or text
                 between two spoken languages. Phrase-based and
                 syntax-based methods represent the state-of-the-art for
                 statistical machine translation (SMT). The phrase-based
                 method specializes in modeling local reorderings and
                 translations of multiword expressions. The syntax-based
                 method is enhanced by using syntactic knowledge, which
                 can better model long word reorderings, discontinuous
                 phrases, and syntactic structure. In this article, we
                 leverage on the strength of these two methods and
                 propose a strategy based on multiple hypotheses
                 generation in a two-stage framework for spoken language
                 translation. The hypotheses are generated in two
                 stages, namely, decoding and regeneration. In the
                 decoding stage, we apply state-of-the-art,
                 phrase-based, and syntax-based methods to generate
                 basic translation hypotheses. Then in the regeneration
                 stage, much more hypotheses that cannot be captured by
                 the decoding algorithms are produced from the basic
                 hypotheses. We study three regeneration methods:
                 redecoding, n-gram expansion, and confusion network in
                 the second stage. Finally, an additional reranking pass
                 is introduced to select the translation outputs by a
                 linear combination of rescoring models. Experimental
                 results on the Chinese-to-English IWSLT-2006 challenge
                 task of translating the transcription of spontaneous
                 speech show that the proposed mechanism achieves
                 significant improvements over the baseline of about
                 2.80 BLEU-score.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "hypotheses generation; spoken language translation;
                 statistical machine translation",
}

@Article{Chiang:2009:ISI,
  author =       "David Chiang and Philipp Koehn",
  title =        "Introduction to the Special Issue on Machine
                 Translation of {Asian} Language",
  journal =      j-TALIP,
  volume =       "8",
  number =       "2",
  pages =        "5:1--5:??",
  month =        may,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1526252.1526253",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Jun 3 16:13:52 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{He:2009:IMH,
  author =       "Xiaodong He and Mei Yang and Jianfeng Gao and Patrick
                 Nguyen and Robert Moore",
  title =        "Improved Monolingual Hypothesis Alignment for Machine
                 Translation System Combination",
  journal =      j-TALIP,
  volume =       "8",
  number =       "2",
  pages =        "6:1--6:??",
  month =        may,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1526252.1526254",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Jun 3 16:13:52 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article presents a new hypothesis alignment
                 method for combining outputs of multiple machine
                 translation (MT) systems. An indirect hidden Markov
                 model (IHMM) is proposed to address the synonym
                 matching and word ordering issues in hypothesis
                 alignment. Unlike traditional HMMs whose parameters are
                 trained via maximum likelihood estimation (MLE), the
                 parameters of the IHMM are estimated indirectly from a
                 variety of sources including word semantic similarity,
                 word surface similarity, and a distance-based
                 distortion penalty. The IHMM-based method significantly
                 outperforms the state-of-the-art, TER-based alignment
                 model in our experiments on NIST benchmark datasets.
                 Our combined SMT system using the proposed method
                 achieved the best Chinese-to-English translation result
                 in the constrained training track of the 2008 NIST Open
                 MT Evaluation.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "hidden Markov model; statistical machine translation;
                 system combination; word alignment",
}

@Article{Ma:2009:BMW,
  author =       "Yanjun Ma and Andy Way",
  title =        "Bilingually Motivated Word Segmentation for
                 Statistical Machine Translation",
  journal =      j-TALIP,
  volume =       "8",
  number =       "2",
  pages =        "7:1--7:??",
  month =        may,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1526252.1526255",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Jun 3 16:13:52 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "We introduce a bilingually motivated word segmentation
                 approach to languages where word boundaries are not
                 orthographically marked, with application to
                 Phrase-Based Statistical Machine Translation (PB-SMT).
                 Our approach is motivated from the insight that PB-SMT
                 systems can be improved by optimizing the input
                 representation to reduce the predictive power of
                 translation models. We firstly present an approach to
                 optimize the existing segmentation of both source and
                 target languages for PB-SMT and demonstrate the
                 effectiveness of this approach using a Chinese--English
                 MT task, that is, to measure the influence of the
                 segmentation on the performance of PB-SMT systems. We
                 report a 5.44\% relative increase in Bleu score and a
                 consistent increase according to other metrics. We then
                 generalize this method for Chinese word segmentation
                 without relying on any segmenters and show that using
                 our segmentation PB-SMT can achieve more consistent
                 state-of-the-art performance across two domains. There
                 are two main advantages of our approach. First of all,
                 it is adapted to the specific translation task at hand
                 by taking the corresponding source (target) language
                 into account. Second, this approach does not rely on
                 manually segmented training data so that it can be
                 automatically adapted for different domains.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "alignment; bilingually motivated; phrase-based
                 statistical machine translation; word segmentation",
}

@Article{Venkatapathy:2009:DMT,
  author =       "Sriram Venkatapathy and Srinivas Bangalore",
  title =        "Discriminative Machine Translation Using Global
                 Lexical Selection",
  journal =      j-TALIP,
  volume =       "8",
  number =       "2",
  pages =        "8:1--8:??",
  month =        may,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1526252.1526256",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Jun 3 16:13:52 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Statistical phrase-based machine translation models
                 crucially rely on word alignments. The search for
                 word-alignments assumes a model of word locality
                 between source and target languages that is violated in
                 starkly different word-order languages such as
                 English-Hindi. In this article, we present models that
                 decouple the steps of lexical selection and lexical
                 reordering with the aim of minimizing the role of
                 word-alignment in machine translation. Indian languages
                 are morphologically rich and have relatively free-word
                 order where the grammatical role of content words is
                 largely determined by their case markers and not just
                 by their positions in the sentence. Hence, lexical
                 selection plays a far greater role than lexical
                 reordering. For lexical selection, we investigate
                 models that take the entire source sentence into
                 account and evaluate their performance for
                 English-Hindi translation in a tourism domain.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "global lexical selection; machine translation",
}

@Article{Tsunakawa:2009:CJL,
  author =       "Takashi Tsunakawa and Naoaki Okazaki and Xiao Liu and
                 Jun'ichi Tsujii",
  title =        "A {Chinese--Japanese} Lexical Machine Translation
                 through a Pivot Language",
  journal =      j-TALIP,
  volume =       "8",
  number =       "2",
  pages =        "9:1--9:??",
  month =        may,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1526252.1526257",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Jun 3 16:13:52 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The bilingual lexicon is an expensive but critical
                 resource for multilingual applications in natural
                 language processing. This article proposes an
                 integrated framework for building a bilingual lexicon
                 between the Chinese and Japanese languages. Since the
                 language pair Chinese--Japanese does not include
                 English, which is a central language of the world, few
                 large-scale bilingual resources between Chinese and
                 Japanese have been constructed. One solution to
                 alleviate this problem is to build a Chinese--Japanese
                 bilingual lexicon through English as the pivot
                 language. In addition to the pivotal approach, we can
                 make use of the characteristics of Chinese and Japanese
                 languages that use Han characters. We incorporate a
                 translation model obtained from a small
                 Chinese--Japanese lexicon and use the similarity of the
                 hanzi and kanji characters by using the log-linear
                 model. Our experimental results show that the use of
                 the pivotal approach can improve the translation
                 performance over the translation model built from a
                 small Chinese--Japanese lexicon. The results also
                 demonstrate that the similarity between the hanzi and
                 kanji characters provides a positive effect for
                 translating technical terms.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "bilingual lexicon; Han characters; hanzi; kanji; pivot
                 language; statistical machine translation",
}

@Article{Chen:2009:USD,
  author =       "Wenliang Chen and Daisuke Kawahara and Kiyotaka
                 Uchimoto and Yujie Zhang and Hitoshi Isahara",
  title =        "Using Short Dependency Relations from Auto-Parsed Data
                 for {Chinese} Dependency Parsing",
  journal =      j-TALIP,
  volume =       "8",
  number =       "3",
  pages =        "10:1--10:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1568292.1568293",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:37:08 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Dependency parsing has become increasingly popular for
                 a surge of interest lately for applications such as
                 machine translation and question answering. Currently,
                 several supervised learning methods can be used for
                 training high-performance dependency parsers if
                 sufficient labeled data are available.\par

                 However, currently used statistical dependency parsers
                 provide poor results for words separated by long
                 distances. In order to solve this problem, this article
                 presents an effective dependency parsing approach of
                 incorporating short dependency information from
                 unlabeled data. The unlabeled data is automatically
                 parsed by using a deterministic dependency parser,
                 which exhibits a relatively high performance for short
                 dependencies between words. We then train another
                 parser that uses the information on short dependency
                 relations extracted from the output of the first
                 parser. The proposed approach achieves an unlabeled
                 attachment score of 86.52\%, an absolute 1.24\%
                 improvement over the baseline system on the Chinese
                 Treebank data set. The results indicate that the
                 proposed approach improves the parsing performance for
                 longer distance words.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Chinese dependency parsing; semi-supervised learning;
                 unlabeled data",
}

@Article{Chanda:2009:WWT,
  author =       "Sukalpa Chanda and Umapada Pal and Oriol Ramos
                 Terrades",
  title =        "Word-Wise {Thai} and {Roman} Script Identification",
  journal =      j-TALIP,
  volume =       "8",
  number =       "3",
  pages =        "11:1--11:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1568292.1568294",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:37:08 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "In some Thai documents, a single text line of a
                 printed document page may contain words of both Thai
                 and Roman scripts. For the Optical Character
                 Recognition (OCR) of such a document page it is better
                 to identify, at first, Thai and Roman script portions
                 and then to use individual OCR systems of the
                 respective scripts on these identified portions. In
                 this article, an SVM-based method is proposed for
                 identification of word-wise printed Roman and Thai
                 scripts from a single line of a document page. Here, at
                 first, the document is segmented into lines and then
                 lines are segmented into character groups (words). In
                 the proposed scheme, we identify the script of a
                 character group combining different character features
                 obtained from structural shape, profile behavior,
                 component overlapping information, topological
                 properties, and water reservoir concept, etc. Based on
                 the experiment on 10,000 data (words) we obtained
                 99.62\% script identification accuracy from the
                 proposed scheme.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Multi-script OCR; script identification; SVM; Thai
                 Script",
}

@Article{Nguyen:2009:WSC,
  author =       "Cam-Tu Nguyen and Xuan-Hieu Phan and Susumu Horiguchi
                 and Thu-Trang Nguyen and Quang-Thuy Ha",
  title =        "{Web} Search Clustering and Labeling with Hidden
                 Topics",
  journal =      j-TALIP,
  volume =       "8",
  number =       "3",
  pages =        "12:1--12:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1568292.1568295",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:37:08 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Web search clustering is a solution to reorganize
                 search results (also called ``snippets'') in a more
                 convenient way for browsing. There are three key
                 requirements for such post-retrieval clustering
                 systems: (1) the clustering algorithm should group
                 similar documents together; (2) clusters should be
                 labeled with descriptive phrases; and (3) the
                 clustering system should provide high-quality
                 clustering without downloading the whole Web
                 page.\par

                 This article introduces a novel framework for
                 clustering Web search results in Vietnamese which
                 targets the three above issues. The main motivation is
                 that by enriching short snippets with hidden topics
                 from huge resources of documents on the Internet, it is
                 able to cluster and label such snippets effectively in
                 a topic-oriented manner without concerning whole Web
                 pages. Our approach is based on recent successful topic
                 analysis models, such as Probabilistic-Latent Semantic
                 Analysis, or Latent Dirichlet Allocation. The
                 underlying idea of the framework is that we collect a
                 very large external data collection called ``universal
                 dataset,'' and then build a clustering system on both
                 the original snippets and a rich set of hidden topics
                 discovered from the universal data collection. This can
                 be seen as a richer representation of snippets to be
                 clustered. We carry out careful evaluation of our
                 method and show that our method can yield impressive
                 clustering quality.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "cluster labeling; collocation; hidden topics analysis;
                 Hierarchical Agglomerative Clustering; Latent Dirichlet
                 allocation; Vietnamese; Web search clustering",
}

@Article{Shaalan:2009:ISI,
  author =       "K. Shaalan and A. Farghaly",
  title =        "Introduction to the Special Issue on {Arabic} Natural
                 Language Processing",
  journal =      j-TALIP,
  volume =       "8",
  number =       "4",
  pages =        "13:1--13:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644879.1644880",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:37:17 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Farghaly:2009:ANL,
  author =       "Ali Farghaly and Khaled Shaalan",
  title =        "{Arabic} Natural Language Processing: Challenges and
                 Solutions",
  journal =      j-TALIP,
  volume =       "8",
  number =       "4",
  pages =        "14:1--14:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644879.1644881",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:37:17 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The Arabic language presents researchers and
                 developers of natural language processing (NLP)
                 applications for Arabic text and speech with serious
                 challenges. The purpose of this article is to describe
                 some of these challenges and to present some solutions
                 that would guide current and future practitioners in
                 the field of Arabic natural language processing (ANLP).
                 We begin with general features of the Arabic language
                 in Sections 1, 2, and 3 and then we move to more
                 specific properties of the language in the rest of the
                 article. In Section 1 of this article we highlight the
                 significance of the Arabic language today and describe
                 its general properties. Section 2 presents the feature
                 of Arabic Diglossia showing how the sociolinguistic
                 aspects of the Arabic language differ from other
                 languages. The stability of Arabic Diglossia and its
                 implications for ANLP applications are discussed and
                 ways to deal with this problematic property are
                 proposed. Section 3 deals with the properties of the
                 Arabic script and the explosion of ambiguity that
                 results from the absence of short vowel representations
                 and overt case markers in contemporary Arabic texts. We
                 present in Section 4 specific features of the Arabic
                 language such as the nonconcatenative property of
                 Arabic morphology, Arabic as an agglutinative language,
                 Arabic as a pro-drop language, and the challenge these
                 properties pose to ANLP. We also present solutions that
                 have already been adopted by some pioneering
                 researchers in the field. In Section 5 we point out to
                 the lack of formal and explicit grammars of Modern
                 Standard Arabic which impedes the progress of more
                 advanced ANLP systems. In Section 6 we draw our
                 conclusion.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Arabic dialects; Arabic script; Modern Standard
                 Arabic",
}

@Article{Espana-Bonet:2009:DPB,
  author =       "Cristina Espa{\~n}a-Bonet and Jes{\'u}s Gim{\'e}nez
                 and Llu{\'\i}s M{\`a}rquez",
  title =        "Discriminative Phrase-Based Models for {Arabic}
                 Machine Translation",
  journal =      j-TALIP,
  volume =       "8",
  number =       "4",
  pages =        "15:1--15:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644879.1644882",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:37:17 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "A design for an Arabic-to-English translation system
                 is presented. The core of the system implements a
                 standard phrase-based statistical machine translation
                 architecture, but it is extended by incorporating a
                 local discriminative phrase selection model to address
                 the semantic ambiguity of Arabic. Local classifiers are
                 trained using linguistic information and context to
                 translate a phrase, and this significantly increases
                 the accuracy in phrase selection with respect to the
                 most frequent translation traditionally considered.
                 These classifiers are integrated into the translation
                 system so that the global task gets benefits from the
                 discriminative learning. As a result, we obtain
                 significant improvements in the full translation task
                 at the lexical, syntactic, and semantic levels as
                 measured by an heterogeneous set of automatic
                 evaluation metrics.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Arabic; discriminative learning; English; statistical
                 machine translation",
}

@Article{Benajiba:2009:MBS,
  author =       "Yassine Benajiba and Imed Zitouni",
  title =        "Morphology-Based Segmentation Combination for {Arabic}
                 Mention Detection",
  journal =      j-TALIP,
  volume =       "8",
  number =       "4",
  pages =        "16:1--16:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644879.1644883",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:37:17 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The Arabic language has a very rich/complex
                 morphology. Each Arabic word is composed of zero or
                 more {\em prefixes}, one {\em stem\/} and zero or more
                 {\em suffixes}. Consequently, the Arabic data is sparse
                 compared to other languages such as English, and it is
                 necessary to conduct word segmentation before any
                 natural language processing task. Therefore, the
                 word-segmentation step is worth a deeper study since it
                 is a preprocessing step which shall have a significant
                 impact on all the steps coming afterward. In this
                 article, we present an Arabic mention detection system
                 that has very competitive results in the recent
                 Automatic Content Extraction (ACE) evaluation campaign.
                 We investigate the impact of different segmentation
                 schemes on Arabic mention detection systems and we show
                 how these systems may benefit from more than one
                 segmentation scheme. We report the performance of
                 several mention detection models using different kinds
                 of possible and known segmentation schemes for Arabic
                 text: punctuation separation, Arabic Treebank, and
                 morphological and character-level segmentations. We
                 show that the combination of competitive segmentation
                 styles leads to a better performance. Results indicate
                 a statistically significant improvement when Arabic
                 Treebank and morphological segmentations are
                 combined.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Arabic information extraction; Arabic mention
                 detection; Arabic segmentation",
}

@Article{Zitouni:2009:CLI,
  author =       "Imed Zitouni and Radu Florian",
  title =        "Cross-Language Information Propagation for {Arabic}
                 Mention Detection",
  journal =      j-TALIP,
  volume =       "8",
  number =       "4",
  pages =        "17:1--17:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644879.1644884",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:37:17 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "In the last two decades, significant effort has been
                 put into annotating linguistic resources in several
                 languages. Despite this valiant effort, there are still
                 many languages left that have only small amounts of
                 such resources. The goal of this article is to present
                 and investigate a method of propagating information
                 (specifically mention detection) from a resource-rich
                 language into a relatively resource-poor language such
                 as Arabic. Part of the investigation is to quantify the
                 contribution of propagating information in different
                 conditions based on the availability of resources in
                 the target language. Experiments on the language pair
                 Arabic-English show that one can achieve relatively
                 decent performance by propagating information from a
                 language with richer resources such as English into
                 Arabic alone (no resources or models in the source
                 language Arabic). Furthermore, results show that
                 propagated features from English do help improve the
                 Arabic system performance even when used in conjunction
                 with all feature types built from the source language.
                 Experiments also show that using propagated features in
                 conjunction with lexically derived features only (as
                 can be obtained directly from a mention annotated
                 corpus) brings the system performance at the one
                 obtained in the target language by using feature
                 derived from many linguistic resources, therefore
                 improving the system when such resources are not
                 available. In addition to Arabic-English language pair,
                 we investigate the effectiveness of our approach on
                 other language pairs such as Chinese--English and
                 Spanish--English.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Arabic information extraction; Arabic mention
                 detection",
}

@Article{Lamel:2009:AST,
  author =       "Lori Lamel and Abdelkhalek Messaoudi and Jean-Luc
                 Gauvain",
  title =        "Automatic Speech-to-Text Transcription in {Arabic}",
  journal =      j-TALIP,
  volume =       "8",
  number =       "4",
  pages =        "18:1--18:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644879.1644885",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:37:17 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The Arabic language presents a number of challenges
                 for speech recognition, arising in part from the
                 significant differences in the spoken and written
                 forms, in particular the conventional form of texts
                 being non-vowelized. Being a highly inflected language,
                 the Arabic language has a very large lexical variety
                 and typically with several possible (generally
                 semantically linked) vowelizations for each written
                 form. This article summarizes research carried out over
                 the last few years on speech-to-text transcription of
                 broadcast data in Arabic. The initial research was
                 oriented toward processing of broadcast news data in
                 Modern Standard Arabic, and has since been extended to
                 address a larger variety of broadcast data, which as a
                 consequence results in the need to also be able to
                 handle dialectal speech. While standard techniques in
                 speech recognition have been shown to apply well to the
                 Arabic language, taking into account language
                 specificities help to significantly improve system
                 performance.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Arabic language processing; automatic speech
                 recognition; mophological decomposition; speech
                 processing; speech-to-text transcription",
}

@Article{Moisl:2009:SLL,
  author =       "Hermann Moisl",
  title =        "Sura Length and Lexical Probability Estimation in
                 Cluster Analysis of the {Qur'an}",
  journal =      j-TALIP,
  volume =       "8",
  number =       "4",
  pages =        "19:1--19:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644879.1644886",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:37:17 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Thabet [2005] applied cluster analysis to the Qur'an
                 in the hope of generating a classification of the
                 (suras) that is useful for understanding of its
                 thematic structure. The result was positive, but
                 variation in (sura) length was a problem because
                 clustering of the shorter was found to be unreliable.
                 The present discussion addresses this problem in four
                 parts. The first part summarizes Thabet's work. The
                 second part argues that unreliable clustering of the
                 shorter is a consequence of poor estimation of lexical
                 population probabilities in those. The third part
                 proposes a solution to the problem based on calculation
                 of a minimum length threshold using concepts from
                 statistical sampling theory followed by selection of
                 and lexical variables based on that threshold. The
                 fourth part applies the proposed solution to a
                 reanalysis of the Qur'an.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Arabic natural language processing; cluster analysis;
                 document length normalization; lexical probability
                 estimation; Qur'an sampling",
}

@Article{Hsu:2010:MST,
  author =       "Chung-Chian Hsu and Chien-Hsing Chen",
  title =        "Mining Synonymous Transliterations from the {World
                 Wide Web}",
  journal =      j-TALIP,
  volume =       "9",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1731035.1731036",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:34:01 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The World Wide Web has been considered one of the
                 important sources for information. Using search engines
                 to retrieve Web pages can gather lots of information,
                 including foreign information. However, to be better
                 understood by local readers, proper names in a foreign
                 language, such as English, are often transliterated to
                 a local language such as Chinese. Due to different
                 translators and the lack of translation standard,
                 translating foreign proper nouns may result in
                 different transliterations and pose a notorious
                 headache. In particular, it may cause incomplete search
                 results. Using one transliteration as a query keyword
                 will fail to retrieve the Web pages which use a
                 different word as the transliteration. Consequently,
                 important information may be missed. We present a
                 framework for mining synonymous transliterations as
                 many as possible from the Web for a given
                 transliteration. The results can be used to construct a
                 database of synonymous transliterations which can be
                 utilized for query expansion so as to alleviate the
                 incomplete search problem. Experimental results show
                 that the proposed framework can effectively retrieve
                 the set of snippets which may contain synonymous
                 transliterations and then extract the target terms.
                 Most of the extracted synonymous transliterations have
                 higher rank of similarity to the input transliteration
                 compared to other noise terms.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Chinese transliteration; cross-lingual information
                 retrieval; synonymous transliteration; text mining; Web
                 mining",
}

@Article{Liu:2010:ISS,
  author =       "Feifan Liu and Yang Liu",
  title =        "Identification of Soundbite and Its Speaker Name Using
                 Transcripts of Broadcast News Speech",
  journal =      j-TALIP,
  volume =       "9",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1731035.1731037",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:34:01 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article presents a pipeline framework for
                 identifying soundbite and its speaker name from
                 Mandarin broadcast news transcripts. Both of the two
                 modules, soundbite segment detection and soundbite
                 speaker name recognition, are based on a supervised
                 classification approach using multiple linguistic
                 features. We systematically evaluated performance for
                 each module as well as the entire system, and
                 investigated the effect of using speech recognition
                 (ASR) output and automatic sentence segmentation. We
                 found that both of the two components impact the
                 pipeline system, with more degradation in the entire
                 system performance due to automatic speaker name
                 recognition errors than soundbite segment detection. In
                 addition, our experimental results show that using ASR
                 output degrades the system performance significantly,
                 and that using automatic sentence segmentation greatly
                 impacts soundbite detection, but has much less effect
                 on speaker name recognition.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "automatic speech recognition; sentence segmentation;
                 Soundbite detection; speaker name recognition",
}

@Article{Tepper:2010:IMU,
  author =       "Michael Tepper and Fei Xia",
  title =        "Inducing Morphemes Using Light Knowledge",
  journal =      j-TALIP,
  volume =       "9",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1731035.1731038",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:34:01 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Allomorphic variation, or form variation among morphs
                 with the same meaning, is a stumbling block to
                 morphological induction (MI). To address this problem,
                 we present a hybrid approach that uses a small amount
                 of linguistic knowledge in the form of orthographic
                 rewrite rules to help refine an existing MI-produced
                 segmentation. Using rules, we derive underlying
                 analyses of morphs---generalized with respect to
                 contextual spelling differences---from an existing
                 surface morph segmentation, and from these we learn a
                 morpheme-level segmentation. To learn morphemes, we
                 have extended the Morfessor segmentation algorithm
                 [Creutz and Lagus 2004; 2005; 2006] by using rules to
                 infer possible underlying analyses from surface
                 segmentations. A segmentation produced by Morfessor
                 Categories-MAP Software v. 0.9.2 is used as input to
                 our procedure and as a baseline that we evaluate
                 against. To suggest analyses for our procedure, a set
                 of language-specific orthographic rules is needed. Our
                 procedure has yielded promising improvements for
                 English and Turkish over the baseline approach when
                 tested on the Morpho Challenge 2005 and 2007 style
                 evaluations. On the Morpho Challenge 2007 test
                 evaluation, we report gains over the current best
                 unsupervised contestant for Turkish, where our
                 technique shows a 2.5\% absolute {\em F\/} -score
                 improvement.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "allomorphy; computational linguistics; machine
                 learning; Morphological induction",
}

@Article{Baldwin:2010:RMB,
  author =       "Timothy Baldwin and Sunam Kim and Francis Bond and
                 Sanae Fujita and David Martinez and Takaaki Tanaka",
  title =        "A Reexamination of {MRD}-Based Word Sense
                 Disambiguation",
  journal =      j-TALIP,
  volume =       "9",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1731035.1731039",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:34:01 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article reconsiders the task of MRD-based word
                 sense disambiguation, in extending the basic Lesk
                 algorithm to investigate the impact on WSD performance
                 of different tokenization schemes and methods of
                 definition extension. In experimentation over the
                 Hinoki Sensebank and the Japanese Senseval-2 dictionary
                 task, we demonstrate that sense-sensitive definition
                 extension over hyponyms, hypernyms, and synonyms,
                 combined with definition extension and word
                 tokenization leads to WSD accuracy above both
                 unsupervised and supervised baselines. In doing so, we
                 demonstrate the utility of ontology induction and
                 establish new opportunities for the development of
                 baseline unsupervised WSD methods.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Japanese; machine-readable dictionary; word sense
                 disambiguation",
}

@Article{Zhao:2010:UCB,
  author =       "Hai Zhao and Chang-Ning Huang and Mu Li and Bao-Liang
                 Lu",
  title =        "A Unified Character-Based Tagging Framework for
                 {Chinese} Word Segmentation",
  journal =      j-TALIP,
  volume =       "9",
  number =       "2",
  pages =        "5:1--5:??",
  month =        jun,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1781134.1781135",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 21 18:03:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Chinese word segmentation is an active area in Chinese
                 language processing though it is suffering from the
                 argument about what precisely is a word in Chinese.
                 Based on corpus-based segmentation standard, we
                 launched this study. In detail, we regard Chinese word
                 segmentation as a character-based tagging problem. We
                 show that there has been a potent trend of using a
                 character-based tagging approach in this field. In
                 particular, learning from segmented corpus with or
                 without additional linguistic resources is treated in a
                 unified way in which the only difference depends on how
                 the feature template set is selected. It differs from
                 existing work in that both feature template selection
                 and tag set selection are considered in our approach,
                 instead of the previous feature template focus only
                 technique. We show that there is a significant
                 performance difference as different tag sets are
                 selected. This is especially applied to a six-tag set,
                 which is good enough for most current segmented
                 corpora. The linguistic meaning of a tag set is also
                 discussed. Our results show that a simple learning
                 system with six $n$-gram feature templates and a
                 six-tag set can obtain competitive performance in the
                 cases of learning only from a training corpus. In cases
                 when additional linguistic resources are available, an
                 ensemble learning technique, assistant segmenter, is
                 proposed and its effectiveness is verified. Assistant
                 segmenter is also proven to be an effective method as
                 segmentation standard adaptation that outperforms
                 existing ones. Based on the proposed approach, our
                 system provides state-of-the-art performance in all 12
                 corpora of three international Chinese word
                 segmentation bakeoffs.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "assistant segmenter; character-based tagging method;
                 Chinese word segmentation; conditional random field;
                 tag set selection",
}

@Article{Guo:2010:LIS,
  author =       "Yuqing Guo and Haifeng Wang and Josef van Genabith",
  title =        "A Linguistically Inspired Statistical Model for
                 {Chinese} Punctuation Generation",
  journal =      j-TALIP,
  volume =       "9",
  number =       "2",
  pages =        "6:1--6:??",
  month =        jun,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1781134.1781136",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 21 18:03:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article investigates a relatively underdeveloped
                 subject in natural language processing---the generation
                 of punctuation marks. From a theoretical perspective,
                 we study 16 Chinese punctuation marks as defined in the
                 Chinese national standard of punctuation usage, and
                 categorize these punctuation marks into three different
                 types according to their syntactic properties. We
                 implement a three-tier maximum entropy model
                 incorporating linguistically-motivated features for
                 generating the commonly used Chinese punctuation marks
                 in unpunctuated sentences output by a surface realizer.
                 Furthermore, we present a method to automatically
                 extract cue words indicating sentence-final punctuation
                 marks as a specialized feature to construct a more
                 precise model. Evaluating on the Penn Chinese Treebank
                 data, the MaxEnt model achieves an {\em f\/} -score of
                 79.83\% for punctuation insertion and 74.61\% for
                 punctuation restoration using gold data input, 79.50\%
                 for insertion and 73.32\% for restoration using
                 parser-based imperfect input. The experiments show that
                 the MaxEnt model significantly outperforms a baseline
                 5-gram language model that scores 54.99\% for
                 punctuation insertion and 52.01\% for restoration. We
                 show that our results are not far from human
                 performance on the same task with human insertion {\em
                 f\/} -scores in the range of 81-87\% and human
                 restoration in the range of 71-82\%. Finally, a manual
                 error analysis of the generation output shows that
                 close to 40\% of the mismatched punctuation marks do in
                 fact result in acceptable choices, a fact obscured in
                 the automatic string-matching based evaluation
                 scores.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Chinese punctuation marks; maximum entropy model;
                 sentence realization",
}

@Article{Naptali:2010:TDL,
  author =       "Welly Naptali and Masatoshi Tsuchiya and Seiichi
                 Nakagawa",
  title =        "Topic-Dependent Language Model with Voting on Noun
                 History",
  journal =      j-TALIP,
  volume =       "9",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jun,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1781134.1781137",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 21 18:03:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Language models (LMs) are an important field of study
                 in automatic speech recognition (ASR) systems. LM helps
                 acoustic models find the corresponding word sequence of
                 a given speech signal. Without it, ASR systems would
                 not understand the language and it would be hard to
                 find the correct word sequence. During the past few
                 years, researchers have tried to incorporate long-range
                 dependencies into statistical word-based $n$-gram LMs.
                 One of these long-range dependencies is topic. Unlike
                 words, topic is unobservable. Thus, it is required to
                 find the meanings behind the words to get into the
                 topic. This research is based on the belief that nouns
                 contain topic information. We propose a new approach
                 for a topic-dependent LM, where the topic is decided in
                 an unsupervised manner. Latent Semantic Analysis (LSA)
                 is employed to reveal hidden (latent) relations among
                 nouns in the context words. To decide the topic of an
                 event, a fixed size word history sequence (window) is
                 observed, and voting is then carried out based on noun
                 class occurrences weighted by a confidence measure.
                 Experiments were conducted on an English corpus and a
                 Japanese corpus: {\em The Wall Street Journal\/} corpus
                 and {\em Mainichi Shimbun\/} (Japanese newspaper)
                 corpus. The results show that our proposed method gives
                 better perplexity than the comparative baselines,
                 including a word-based/class-based $n$-gram LM, their
                 interpolated LM, a cache-based LM, a topic-dependent LM
                 based on $n$-gram, and a topic-dependent LM based on
                 Latent Dirichlet Allocation (LDA). The {\em n\/} -best
                 list rescoring was conducted to validate its
                 application in ASR systems.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Language model; latent semantic analysis; perplexity;
                 speech recognition; topic dependent",
}

@Article{Ng:2010:SJ,
  author =       "Hwee Tou Ng",
  title =        "The State of the Journal",
  journal =      j-TALIP,
  volume =       "9",
  number =       "3",
  pages =        "8:1--8:??",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1838745.1838750",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Sep 18 15:58:58 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Harman:2010:ISI,
  author =       "Donna Harman and Noriko Kando and Prasenjit Majumder
                 and Mandar Mitra and Carol Peters",
  title =        "Introduction to the {Special Issue on Indian Language
                 Information Retrieval Part I}",
  journal =      j-TALIP,
  volume =       "9",
  number =       "3",
  pages =        "9:1--9:??",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1838745.1838746",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Sep 18 15:58:58 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Majumder:2010:FEE,
  author =       "Prasenjit Majumder and Mandar Mitra and Dipasree Pal
                 and Ayan Bandyopadhyay and Samaresh Maiti and Sukomal
                 Pal and Deboshree Modak and Sucharita Sanyal",
  title =        "The {FIRE 2008} Evaluation Exercise",
  journal =      j-TALIP,
  volume =       "9",
  number =       "3",
  pages =        "10:1--10:??",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1838745.1838747",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Sep 18 15:58:58 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The aim of the Forum for Information Retrieval
                 Evaluation (FIRE) is to create an evaluation framework
                 in the spirit of TREC (Text REtrieval Conference), CLEF
                 (Cross-Language Evaluation Forum), and NTCIR (NII Test
                 Collection for IR Systems), for Indian language
                 Information Retrieval. The first evaluation exercise
                 conducted by FIRE was completed in 2008. This article
                 describes the test collections used at FIRE 2008,
                 summarizes the approaches adopted by various
                 participants, discusses the limitations of the
                 datasets, and outlines the tasks planned for the next
                 iteration of FIRE.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "evaluation; Indian languages; information retrieval",
}

@Article{Dolamic:2010:CSI,
  author =       "Ljiljana Dolamic and Jacques Savoy",
  title =        "Comparative Study of Indexing and Search Strategies
                 for the {Hindi}, {Marathi}, and {Bengali} Languages",
  journal =      j-TALIP,
  volume =       "9",
  number =       "3",
  pages =        "11:1--11:??",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1838745.1838748",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Sep 18 15:58:58 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The main goal of this article is to describe and
                 evaluate various indexing and search strategies for the
                 Hindi, Bengali, and Marathi languages. These three
                 languages are ranked among the world's 20 most spoken
                 languages and they share similar syntax, morphology,
                 and writing systems. In this article we examine these
                 languages from an Information Retrieval (IR)
                 perspective through describing the key elements of
                 their inflectional and derivational morphologies, and
                 suggest a light and more aggressive stemming approach
                 based on them.\par

                 In our evaluation of these stemming strategies we make
                 use of the FIRE 2008 test collections, and then to
                 broaden our comparisons we implement and evaluate two
                 language independent indexing methods: the $n$-gram and
                 trunc-$n$ (truncation of the first $n$ letters). We
                 evaluate these solutions by applying our various IR
                 models, including the Okapi, Divergence from Randomness
                 (DFR) and statistical language models (LM) together
                 with two classical vector-space approaches: {\em tf
                 idf\/} and {\em Lnu-ltc}.\par

                 Experiments performed with all three languages
                 demonstrate that the I(n$_e$)C2 model derived from the
                 Divergence from Randomness paradigm tends to provide
                 the best mean average precision (MAP). Our own tests
                 suggest that improved retrieval effectiveness would be
                 obtained by applying more aggressive stemmers,
                 especially those accounting for certain derivational
                 suffixes, compared to those involving a light stemmer
                 or ignoring this type of word normalization procedure.
                 Comparisons between no stemming and stemming indexing
                 schemes shows that performance differences are almost
                 always statistically significant. When, for example, an
                 aggressive stemmer is applied, the relative
                 improvements obtained are $\approx$28\% for the Hindi
                 language, $\approx$42\% for Marathi, and $\approx$18\%
                 for Bengali, as compared to a no-stemming approach.
                 Based on a comparison of word-based and
                 language-independent approaches we find that the
                 trunc-4 indexing scheme tends to result in performance
                 levels statistically similar to those of an aggressive
                 stemmer, yet better than the 4-gram indexing scheme. A
                 query-by-query analysis reveals the reasons for this,
                 and also demonstrates the advantage of applying a
                 stemming or a trunc-4 indexing scheme.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "Bengali language; Hindi language; Indic languages;
                 Marathi language; natural language processing with
                 Indo-European languages; search engines for Asian
                 languages; stemmer",
}

@Article{Leveling:2010:SWI,
  author =       "Johannes Leveling and Gareth J. F. Jones",
  title =        "Sub-Word Indexing and Blind Relevance Feedback for
                 {English}, {Bengali}, {Hindi}, and {Marathi} {IR}",
  journal =      j-TALIP,
  volume =       "9",
  number =       "3",
  pages =        "12:1--12:??",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1838745.1838749",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Sep 18 15:58:58 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The Forum for Information Retrieval Evaluation (FIRE)
                 provides document collections, topics, and relevance
                 assessments for information retrieval (IR) experiments
                 on Indian languages. Several research questions are
                 explored in this article: (1) How to create a simple,
                 language-independent corpus-based stemmer, (2) How to
                 identify sub-words and which types of sub-words are
                 suitable as indexing units, and (3) How to apply blind
                 relevance feedback on sub-words and how feedback term
                 selection is affected by the type of the indexing unit.
                 More than 140 IR experiments are conducted using the
                 BM25 retrieval model on the topic titles and
                 descriptions (TD) for the FIRE 2008 English, Bengali,
                 Hindi, and Marathi document collections.\par

                 The major findings are: The corpus-based stemming
                 approach is effective as a knowledge-light term
                 conflation step and useful in the case of few
                 language-specific resources. For English, the
                 corpus-based stemmer performs nearly as well as the
                 Porter stemmer and significantly better than the
                 baseline of indexing words when combined with query
                 expansion. In combination with blind relevance
                 feedback, it also performs significantly better than
                 the baseline for Bengali and Marathi IR.\par

                 Sub-words such as consonant-vowel sequences and word
                 prefixes can yield similar or better performance in
                 comparison to word indexing. There is no best
                 performing method for all languages. For English,
                 indexing using the Porter stemmer performs best, for
                 Bengali and Marathi, overlapping 3-grams obtain the
                 best result, and for Hindi, 4-prefixes yield the
                 highest MAP. However, in combination with blind
                 relevance feedback using 10 documents and 20 terms,
                 6-prefixes for English and 4-prefixes for Bengali,
                 Hindi, and Marathi IR yield the highest
                 MAP.\par

                 Sub-word identification is a general case of
                 decompounding. It results in one or more index terms
                 for a single word form and increases the number of
                 index terms but decreases their average length. The
                 corresponding retrieval experiments show that relevance
                 feedback on sub-words benefits from selecting a larger
                 number of index terms in comparison with retrieval on
                 word forms. Similarly, selecting the number of
                 relevance feedback terms depending on the ratio of word
                 vocabulary size to sub-word vocabulary size almost
                 always slightly increases information retrieval
                 effectiveness compared to using a fixed number of terms
                 for different languages.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "blind relevance feedback; evaluation; FIRE;
                 Information retrieval; stemming; sub-word indexing",
}

@Article{Kumaran:2010:CMT,
  author =       "A. Kumaran and Mitesh M. Khapra and Pushpak
                 Bhattacharyya",
  title =        "Compositional Machine Transliteration",
  journal =      j-TALIP,
  volume =       "9",
  number =       "4",
  pages =        "13:1--13:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1838751.1838752",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Dec 15 10:47:09 MST 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Machine transliteration is an important problem in an
                 increasingly multilingual world, as it plays a critical
                 role in many downstream applications, such as machine
                 translation or crosslingual information retrieval
                 systems. In this article, we propose compositional
                 machine transliteration systems, where multiple
                 transliteration components may be composed either to
                 improve existing transliteration quality, or to enable
                 transliteration functionality between languages even
                 when no direct parallel names corpora exist between
                 them. Specifically, we propose two distinct forms of
                 composition: serial and parallel. Serial compositional
                 system chains individual transliteration components,
                 say, $X \rightarrow Y$ and $Y \rightarrow Z$ systems,
                 to provide transliteration functionality, $X
                 \rightarrow Z$.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Chinnakotla:2010:TRS,
  author =       "Manoj K. Chinnakotla and Om P. Damani and Avijit
                 Satoskar",
  title =        "Transliteration for Resource-Scarce Languages",
  journal =      j-TALIP,
  volume =       "9",
  number =       "4",
  pages =        "14:1--14:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1838751.1838753",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Dec 15 10:47:09 MST 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Today, parallel corpus-based systems dominate the
                 transliteration landscape. But the resource-scarce
                 languages do not enjoy the luxury of large parallel
                 transliteration corpus. For these languages, rule-based
                 transliteration is the only viable option. In this
                 article, we show that by properly harnessing the
                 monolingual resources in conjunction with manually
                 created rule base, one can achieve reasonable
                 transliteration performance. We achieve this
                 performance by exploiting the power of Character
                 Sequence Modeling (CSM), which requires only
                 monolingual resources. We present the results of our
                 rule-based system for Hindi to English, English to
                 Hindi, and Persian to English transliteration tasks. We
                 also perform extrinsic evaluation of transliteration
                 systems in the context of Cross Lingual Information
                 Retrieval.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Mukund:2010:IES,
  author =       "Smruthi Mukund and Rohini Srihari and Erik Peterson",
  title =        "An Information-Extraction System for {Urdu}---{A}
                 Resource-Poor Language",
  journal =      j-TALIP,
  volume =       "9",
  number =       "4",
  pages =        "15:1--15:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1838751.1838754",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Dec 15 10:47:09 MST 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "There has been an increase in the amount of
                 multilingual text on the Internet due to the
                 proliferation of news sources and blogs. The Urdu
                 language, in particular, has experienced explosive
                 growth on the Web. Text mining for information
                 discovery, which includes tasks such as identifying
                 topics, relationships and events, and sentiment
                 analysis, requires sophisticated natural language
                 processing (NLP). NLP systems begin with modules such
                 as word segmentation, part-of-speech tagging, and
                 morphological analysis and progress to modules such as
                 shallow parsing and named entity tagging. While there
                 have been considerable advances in developing such
                 comprehensive NLP systems for English, the work for
                 Urdu is still in its infancy.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Katz:2011:ISI,
  author =       "Graham Katz and Mona Diab",
  title =        "Introduction to the Special Issue on {Arabic}
                 Computational Linguistics",
  journal =      j-TALIP,
  volume =       "10",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1929908.1929909",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Mar 16 18:07:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Condon:2011:MTE,
  author =       "S. Condon and D. Parvaz and J. Aberdeen and C. Doran
                 and A. Freeman and M. Awad",
  title =        "Machine Translation Errors: {English} and {Iraqi
                 Arabic}",
  journal =      j-TALIP,
  volume =       "10",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1929908.1929910",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Mar 16 18:07:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Errors in machine translations of English-Iraqi Arabic
                 dialogues were analyzed using the methods developed for
                 the Human Translation Error Rate measure (HTER). Human
                 annotations were used to refine the Translation Error
                 Rate (TER) annotations. The analyses were performed on
                 approximately 100 translations into each language from
                 four translation systems. Results include high
                 frequencies of pronoun errors and errors involving the
                 copula in translations to English. High frequencies of
                 errors in subject/person inflection and closed-word
                 classes characterized translations to Iraqi Arabic.
                 There were similar frequencies of word order errors in
                 both translation directions and low frequencies of
                 polarity errors. The problems associated with many
                 errors can be predicted from structural differences
                 between the two languages.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Rytting:2011:SCD,
  author =       "C. Anton Rytting and David M. Zajic and Paul Rodrigues
                 and Sarah C. Wayland and Christian Hettick and Tim
                 Buckwalter and Charles C. Blake",
  title =        "Spelling Correction for Dialectal {Arabic} Dictionary
                 Lookup",
  journal =      j-TALIP,
  volume =       "10",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1929908.1929911",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Mar 16 18:07:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The ``Did You Mean\ldots{}?'' system, described in this
                 article, is a spelling corrector for Arabic that is
                 designed specifically for L2 learners of dialectal
                 Arabic in the context of dictionary lookup. The authors
                 use an orthographic density metric to motivate the need
                 for a finer-grained ranking method for candidate words
                 than unweighted Levenshtein edit distance. The Did You
                 Mean\ldots{}? architecture is described, and the authors
                 show that mean reciprocal rank can be improved by
                 tuning operation weights according to sound confusions,
                 and by anticipating likely spelling variants.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kulick:2011:ESC,
  author =       "Seth Kulick",
  title =        "Exploiting Separation of Closed-Class Categories for
                 {Arabic} Tokenization and Part-of-Speech Tagging",
  journal =      j-TALIP,
  volume =       "10",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1929908.1929912",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Mar 16 18:07:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Research on the problem of morphological
                 disambiguation of Arabic has noted that techniques
                 developed for lexical disambiguation in English do not
                 easily transfer over, since the affixation present in
                 Arabic creates a very different tag set than for
                 English, encoding both inflectional morphology and more
                 complex tokenization sequences. This work takes a new
                 approach to this problem based on a distinction between
                 the open-class and closed-class categories of tokens,
                 which differ both in their frequencies and in their
                 possible morphological affixations. This separation
                 simplifies the morphological analysis problem
                 considerably, making it possible to use a Conditional
                 Random Field model for joint tokenization and ``core''
                 part-of-speech tagging of the open-class items, while
                 the closed-class items are handled by regular
                 expressions.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Abdul-Mageed:2011:ADA,
  author =       "Muhammad Abdul-Mageed",
  title =        "Automatic Detection of {Arabic} Non-Anaphoric Pronouns
                 for Improving Anaphora Resolution",
  journal =      j-TALIP,
  volume =       "10",
  number =       "1",
  pages =        "5:1--5:??",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1929908.1929913",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Mar 16 18:07:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Anaphora resolution is one of the most difficult tasks
                 in NLP. The ability to identify non-referential
                 pronouns before attempting an anaphora resolution task
                 would be significant, since the system would not have
                 to attempt resolving such pronouns and hence end up
                 with fewer errors. In addition, the number of
                 non-referential pronouns has been found to be
                 non-trivial in many domains. The task of detecting
                 non-referential pronouns could also be incorporated
                 into a part-of-speech tagger or a parser, or treated as
                 an initial step in semantic interpretation. In this
                 article, I describe a machine learning method for
                 identifying non-referential pronouns in an annotated
                 subsegment of the Penn Arabic Treebank using three
                 different feature settings.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Wu:2011:IPD,
  author =       "Chung-Hsien Wu and Wei-Bin Liang and Jui-Feng Yeh",
  title =        "Interruption Point Detection of Spontaneous Speech
                 Using Inter-Syllable Boundary-Based Prosodic Features",
  journal =      j-TALIP,
  volume =       "10",
  number =       "1",
  pages =        "6:1--6:??",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1929908.1929914",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Mar 16 18:07:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article presents a probabilistic scheme for
                 detecting the interruption point (IP) in spontaneous
                 speech based on inter-syllable boundary-based prosodic
                 features. Because of the high error rate in spontaneous
                 speech recognition, a combined acoustic model
                 considering both syllable and subsyllable recognition
                 units, is firstly used to determine the inter-syllable
                 boundaries and output the recognition confidence of the
                 input speech. Based on the finding that IPs always
                 occur at inter-syllable boundaries, a probability
                 distribution of the prosodic features at the current
                 potential IP is estimated. The Conditional Random Field
                 (CRF) model, which employs the clustered prosodic
                 features of the current potential IP and its preceding
                 and succeeding inter-syllable boundaries, is employed
                 to output the IP likelihood measure.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Wu:2011:ADS,
  author =       "Chung-Hsien Wu and Hung-Yu Su and Han-Ping Shen",
  title =        "Articulation-Disordered Speech Recognition Using
                 Speaker-Adaptive Acoustic Models and Personalized
                 Articulation Patterns",
  journal =      j-TALIP,
  volume =       "10",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jun,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1967293.1967294",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Jun 28 18:29:03 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article presents a novel approach to
                 speaker-adaptive recognition of speech from
                 articulation-disordered speakers without a large amount
                 of adaptation data. An unsupervised, incremental
                 adaptation method is adopted for personalized model
                 adaptation based on the recognized syllables with high
                 recognition confidence from an automatic speech
                 recognition (ASR) system. For articulation pattern
                 discovery, the manually transcribed syllables and the
                 corresponding recognized syllables are associated with
                 each other using articulatory features. The Apriori
                 algorithm is applied to discover the articulation
                 patterns in the corpus, which are then used to
                 construct a personalized pronunciation dictionary to
                 improve the recognition accuracy of the ASR. The
                 experimental results indicate that the proposed
                 adaptation method achieves a syllable error rate
                 reduction of 6.1\%, outperforming the conventional
                 adaptation methods that have a syllable error rate
                 reduction of 3.8\%.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Paik:2011:FCB,
  author =       "Jiaul H. Paik and Swapan K. Parui",
  title =        "A Fast Corpus-Based Stemmer",
  journal =      j-TALIP,
  volume =       "10",
  number =       "2",
  pages =        "8:1--8:??",
  month =        jun,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1967293.1967295",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Jun 28 18:29:03 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Stemming is a mechanism of word form normalization
                 that transforms the variant word forms to their common
                 root. In an Information Retrieval system, it is used to
                 increase the system's performance, specifically the
                 recall and desirably the precision. Although its
                 usefulness is shown to be mixed in languages such as
                 English, because morphologically complex languages
                 stemming produces a significant performance
                 improvement. A number of linguistic rule-based stemmers
                 are available for most European languages which employ
                 a set of rules to get back the root word from its
                 variants. But for Indian languages which are highly
                 inflectional in nature, devising a linguistic
                 rule-based stemmer needs some additional resources
                 which are not available.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Ekbal:2011:WVB,
  author =       "Asif Ekbal and Sriparna Saha",
  title =        "Weighted Vote-Based Classifier Ensemble for Named
                 Entity Recognition: a Genetic Algorithm-Based
                 Approach",
  journal =      j-TALIP,
  volume =       "10",
  number =       "2",
  pages =        "9:1--9:??",
  month =        jun,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1967293.1967296",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Jun 28 18:29:03 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "In this article, we report the search capability of
                 Genetic Algorithm (GA) to construct a weighted
                 vote-based classifier ensemble for Named Entity
                 Recognition (NER). Our underlying assumption is that
                 the reliability of predictions of each classifier
                 differs among the various named entity (NE) classes.
                 Thus, it is necessary to quantify the amount of voting
                 of a particular classifier for a particular output
                 class. Here, an attempt is made to determine the
                 appropriate weights of voting for each class in each
                 classifier using GA. The proposed technique is
                 evaluated for four leading Indian languages, namely
                 Bengali, Hindi, Telugu, and Oriya, which are all
                 resource-poor in nature.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Liu:2011:VPS,
  author =       "C.-L. Liu and M.-H. Lai and K.-W. Tien and Y.-H.
                 Chuang and S.-H. Wu and C.-Y. Lee",
  title =        "Visually and Phonologically Similar Characters in
                 Incorrect {Chinese} Words: Analyses, Identification,
                 and Applications",
  journal =      j-TALIP,
  volume =       "10",
  number =       "2",
  pages =        "10:1--10:??",
  month =        jun,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1967293.1967297",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Jun 28 18:29:03 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Information about students' mistakes opens a window to
                 an understanding of their learning processes, and helps
                 us design effective course work to help students avoid
                 replication of the same errors. Learning from mistakes
                 is important not just in human learning activities; it
                 is also a crucial ingredient in techniques for the
                 developments of student models. In this article, we
                 report findings of our study on 4,100 erroneous Chinese
                 words. Seventy-six percent of these errors were related
                 to the phonological similarity between the correct and
                 the incorrect characters, 46\% were due to visual
                 similarity, and 29\% involved both factors. We propose
                 a computing algorithm that aims at replication of
                 incorrect Chinese words.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Chen:2011:ISI,
  author =       "Keh-Jiann Chen and Qun Liu and Nianwen Xue and Le
                 Sun",
  title =        "Introduction to the Special Issue on {Chinese}
                 Language Processing",
  journal =      j-TALIP,
  volume =       "10",
  number =       "3",
  pages =        "11:1--11:??",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2002980.2002981",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Sep 9 15:01:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Zhu:2011:ATC,
  author =       "Muhua Zhu and Jingbo Zhu and Tong Xiao",
  title =        "Automatic Treebank Conversion via Informed Decoding
                 --- {A} Case Study on {Chinese} Treebanks",
  journal =      j-TALIP,
  volume =       "10",
  number =       "3",
  pages =        "12:1--12:??",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2002980.2002982",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Sep 9 15:01:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Treebanks are valuable resources for syntactic
                 parsing. For some languages such as Chinese, we can
                 obtain multiple constituency treebanks which are
                 developed by different organizations. However, due to
                 discrepancies of underlying annotation standards, such
                 treebanks in general cannot be used together through
                 direct data combination. To enlarge training data for
                 syntactic parsing, we focus in this article on the
                 challenge of unifying standards of disparate treebanks
                 by automatically converting one treebank (source
                 treebank) to fit a different standard which is
                 exhibited by another treebank (target treebank). We
                 propose to convert a treebank in two sequential steps
                 which correspond to the part-of-speech level and
                 syntactic structure level (including tree structures
                 and grammar labels), respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Li:2011:USR,
  author =       "Junhui Li and Guodong Zhou",
  title =        "Unified Semantic Role Labeling for Verbal and Nominal
                 Predicates in the {Chinese} Language",
  journal =      j-TALIP,
  volume =       "10",
  number =       "3",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2002980.2002983",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Sep 9 15:01:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article explores unified semantic role labeling
                 (SRL) for both verbal and nominal predicates in the
                 Chinese language. This is done by considering SRL for
                 both verbal and nominal predicates in a unified
                 framework. First, we systematically examine various
                 kinds of features for verbal SRL and nominal SRL,
                 respectively, besides those widely used ones. Then we
                 further improve the performance of nominal SRL with
                 various kinds of verbal evidence, that is, merging the
                 training instances from verbal predicates and
                 integrating various kinds of features derived from SRL
                 for verbal predicates. Finally, we address the issue of
                 automatic predicate recognition, which is essential for
                 nominal SRL.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Zhang:2011:DPS,
  author =       "Peng Zhang and Wenjie Li and Yuexian Hou and Dawei
                 Song",
  title =        "Developing Position Structure-Based Framework for
                 {Chinese} Entity Relation Extraction",
  journal =      j-TALIP,
  volume =       "10",
  number =       "3",
  pages =        "14:1--14:??",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2002980.2002984",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Sep 9 15:01:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Relation extraction is the task of finding semantic
                 relations between two entities in text, and is often
                 cast as a classification problem. In contrast to the
                 significant achievements on English language, research
                 progress in Chinese relation extraction is relatively
                 limited. In this article, we present a novel Chinese
                 relation extraction framework, which is mainly based on
                 a 9-position structure. The design of this proposed
                 structure is motivated by the fact that there are some
                 obvious connections between relation types/subtypes and
                 position structures of two entities. The 9-position
                 structure can be captured with less effort than
                 applying deep natural language processing, and is
                 effective to relieve the class imbalance problem which
                 often hurts the classification performance.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Qian:2011:ECD,
  author =       "Longhua Qian and Guodong Zhou and Qiaoming Zhu",
  title =        "Employing Constituent Dependency Information for Tree
                 Kernel-Based Semantic Relation Extraction between Named
                 Entities",
  journal =      j-TALIP,
  volume =       "10",
  number =       "3",
  pages =        "15:1--15:??",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2002980.2002985",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Sep 9 15:01:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article proposes a new approach to dynamically
                 determine the tree span for tree kernel-based semantic
                 relation extraction between named entities. The basic
                 idea is to employ constituent dependency information in
                 keeping the necessary nodes and their head children
                 along the path connecting the two entities in the
                 syntactic parse tree, while removing the noisy
                 information from the tree, eventually leading to a
                 dynamic syntactic parse tree. This article also
                 explores various entity features and their possible
                 combinations via a unified syntactic and semantic tree
                 framework, which integrates both structural syntactic
                 parse information and entity-related semantic
                 information. Evaluation on the ACE RDC 2004 English and
                 2005 Chinese benchmark corpora shows that our dynamic
                 syntactic parse tree much outperforms all previous tree
                 spans, indicating its effectiveness in well
                 representing the structural nature of relation
                 instances while removing redundant information.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Huang:2011:UST,
  author =       "Chung-Chi Huang and Ho-Ching Yen and Ping-Che Yang and
                 Shih-Ting Huang and Jason S. Chang",
  title =        "Using Sublexical Translations to Handle the {OOV}
                 Problem in Machine Translation",
  journal =      j-TALIP,
  volume =       "10",
  number =       "3",
  pages =        "16:1--16:??",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2002980.2002986",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Sep 9 15:01:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "We introduce a method for learning to translate
                 out-of-vocabulary (OOV) words. The method focuses on
                 combining sublexical/constituent translations of an OOV
                 to generate its translation candidates. In our
                 approach, wildcard searches are formulated based on our
                 OOV analysis, aimed at maximizing the probability of
                 retrieving OOVs' sublexical translations from existing
                 resources of Machine Translation (MT) systems. At
                 run-time, translation candidates of the unknown words
                 are generated from their suitable sublexical
                 translations and ranked based on monolingual and
                 bilingual information.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Du:2011:ICE,
  author =       "Jinhua Du and Andy Way",
  title =        "Improved {Chinese--English} {SMT} with {Chinese}
                 {``DE'}' Construction Classification and Reordering",
  journal =      j-TALIP,
  volume =       "10",
  number =       "4",
  pages =        "17:1--17:??",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2025384.2025385",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 15 09:23:26 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Syntactic reordering on the source side has been
                 demonstrated to be helpful and effective for handling
                 different word orders between source and target
                 languages in SMT. In this article, we focus on the
                 Chinese (DE) construction which is flexible and
                 ubiquitous in Chinese and has many different ways to be
                 translated into English so that it is a major source of
                 word order differences in terms of translation quality.
                 This article carries out the Chinese ``DE''
                 construction study for Chinese--English SMT in which we
                 propose a new classifier model---discriminative latent
                 variable model (DPLVM)---with new features to improve
                 the classification accuracy and indirectly improve the
                 translation quality compared to a log-linear
                 classifier.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Xiao:2011:LMS,
  author =       "Tong Xiao and Jingbo Zhu and Muhua Zhu",
  title =        "Language Modeling for {Syntax-Based} Machine
                 Translation Using Tree Substitution Grammars: a Case
                 Study on {Chinese-English} Translation",
  journal =      j-TALIP,
  volume =       "10",
  number =       "4",
  pages =        "18:1--18:??",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2025384.2025386",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 15 09:23:26 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The poor grammatical output of Machine Translation
                 (MT) systems appeals syntax-based approaches within
                 language modeling. However, previous studies showed
                 that syntax-based language modeling using
                 (Context-Free) Treebank Grammars was not very helpful
                 in improving BLEU scores for Chinese-English machine
                 translation. In this article we further study this
                 issue in the context of Chinese-English syntax-based
                 Statistical Machine Translation (SMT) where Synchronous
                 Tree Substitution Grammars (STSGs) are utilized to
                 model the translation process. In particular, we
                 develop a Tree Substitution Grammar-based language
                 model for syntax-based MT, and present three methods to
                 efficiently integrate the proposed language model into
                 MT decoding. In addition, we design a simple and
                 effective method to adapt syntax-based language models
                 for MT tasks.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Li:2011:MEC,
  author =       "Lishuang Li and Peng Wang and Degen Huang and Lian
                 Zhao",
  title =        "Mining {English--Chinese} Named Entity Pairs from
                 Comparable Corpora",
  journal =      j-TALIP,
  volume =       "10",
  number =       "4",
  pages =        "19:1--19:??",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2025384.2025387",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 15 09:23:26 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Bilingual Named Entity (NE) pairs are valuable
                 resources for many NLP applications. Since comparable
                 corpora are more accessible, abundant and up-to-date,
                 recent researches have concentrated on mining bilingual
                 lexicons using comparable corpora. Leveraging
                 comparable corpora, this research presents a novel
                 approach to mining English-Chinese NE translations by
                 combining multi-dimension features from various
                 information sources for every possible NE pair, which
                 include the transliteration model, English-Chinese
                 matching, Chinese-English matching, translation model,
                 length, and context vector. These features are
                 integrated into one model with linear combination and
                 minimum sample risk (MSR) algorithm. As for the high
                 type-dependence of NE translation, we integrate
                 different features according to different NE types.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Liu:2011:UBR,
  author =       "Zhiyuan Liu and Yabin Zheng and Lixing Xie and Maosong
                 Sun and Liyun Ru and Yang Zhang",
  title =        "User Behaviors in Related Word Retrieval and New Word
                 Detection: a Collaborative Perspective",
  journal =      j-TALIP,
  volume =       "10",
  number =       "4",
  pages =        "20:1--20:??",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2025384.2025388",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 15 09:23:26 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Nowadays, user behavior analysis and collaborative
                 filtering have drawn a large body of research in the
                 machine learning community. The goal is either to
                 enhance the user experience or discover useful
                 information hidden in the data. In this article, we
                 conduct extensive experiments on a Chinese input method
                 data set, which keeps the word lists that users have
                 used. Then, from the collaborative perspective, we aim
                 to solve two tasks in natural language processing, that
                 is, related word retrieval and new word detection.
                 Motivated by the observation that two words are usually
                 highly related to each other if they co-occur
                 frequently in users' records, we propose a novel
                 semantic relatedness measure between words that takes
                 both user behaviors and collaborative filtering into
                 consideration.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Wang:2011:DLA,
  author =       "Baoxun Wang and Bingquan Liu and Xiaolong Wang and
                 Chengjie Sun and Deyuan Zhang",
  title =        "Deep Learning Approaches to Semantic Relevance
                 Modeling for {Chinese} Question--Answer Pairs",
  journal =      j-TALIP,
  volume =       "10",
  number =       "4",
  pages =        "21:1--21:??",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2025384.2025389",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 15 09:23:26 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The human-generated question-answer pairs in the Web
                 social communities are of great value for the research
                 of automatic question-answering technique. Due to the
                 large amount of noise information involved in such
                 corpora, it is still a problem to detect the answers
                 even though the questions are exactly located.
                 Quantifying the semantic relevance between questions
                 and their candidate answers is essential to answer
                 detection in social media corpora. Since both the
                 questions and their answers usually contain a small
                 number of sentences, the relevance modeling methods
                 have to overcome the problem of word feature sparsity.
                 In this article, the deep learning principle is
                 introduced to address the semantic relevance modeling
                 task.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Pal:2012:HRI,
  author =       "Umapada Pal and Ramachandran Jayadevan and Nabin
                 Sharma",
  title =        "Handwriting Recognition in {Indian} Regional Scripts:
                 a Survey of Offline Techniques",
  journal =      j-TALIP,
  volume =       "11",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2090176.2090177",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Mar 1 16:54:10 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Offline handwriting recognition in Indian regional
                 scripts is an interesting area of research as almost
                 460 million people in India use regional scripts. The
                 nine major Indian regional scripts are Bangla (for
                 Bengali and Assamese languages), Gujarati, Kannada,
                 Malayalam, Oriya, Gurumukhi (for Punjabi language),
                 Tamil, Telugu, and Nastaliq (for Urdu language). A
                 state-of-the-art survey about the techniques available
                 in the area of offline handwriting recognition (OHR) in
                 Indian regional scripts will be of a great aid to the
                 researchers in the subcontinent and hence a sincere
                 attempt is made in this article to discuss the
                 advancements reported in this regard during the last
                 few decades.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Zaghouani:2012:RRB,
  author =       "Wajdi Zaghouani",
  title =        "{RENAR}: a Rule-Based {Arabic} Named Entity
                 Recognition System",
  journal =      j-TALIP,
  volume =       "11",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2090176.2090178",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Mar 1 16:54:10 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Named entity recognition has served many natural
                 language processing tasks such as information
                 retrieval, machine translation, and question answering
                 systems. Many researchers have addressed the name
                 identification issue in a variety of languages and
                 recently some research efforts have started to focus on
                 named entity recognition for the Arabic language. We
                 present a working Arabic information extraction (IE)
                 system that is used to analyze large volumes of news
                 texts every day to extract the named entity (NE) types
                 person, organization, location, date, and number, as
                 well as quotations (direct reported speech) by and
                 about people. The named entity recognition (NER) system
                 was not developed for Arabic, but instead a
                 multilingual NER system was adapted to also cover
                 Arabic.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Chang:2012:EDC,
  author =       "Ru-Yng Chang and Chung-Hsien Wu and Philips Kokoh
                 Prasetyo",
  title =        "Error Diagnosis of {Chinese} Sentences Using Inductive
                 Learning Algorithm and Decomposition-Based Testing
                 Mechanism",
  journal =      j-TALIP,
  volume =       "11",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2090176.2090179",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Mar 1 16:54:10 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This study presents a novel approach to error
                 diagnosis of Chinese sentences for Chinese as second
                 language (CSL) learners. A penalized probabilistic
                 First-Order Inductive Learning (pFOIL) algorithm is
                 presented for error diagnosis of Chinese sentences. The
                 pFOIL algorithm integrates inductive logic programming
                 (ILP), First-Order Inductive Learning (FOIL), and a
                 penalized log-likelihood function for error diagnosis.
                 This algorithm considers the uncertain, imperfect, and
                 conflicting characteristics of Chinese sentences to
                 infer error types and produce human-interpretable rules
                 for further error correction. In a pFOIL algorithm,
                 relation pattern background knowledge and quantized
                 t-score background knowledge are proposed to
                 characterize a sentence and then used for likelihood
                 estimation.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{He:2012:ISP,
  author =       "Yulan He",
  title =        "Incorporating Sentiment Prior Knowledge for Weakly
                 Supervised Sentiment Analysis",
  journal =      j-TALIP,
  volume =       "11",
  number =       "2",
  pages =        "4:1--4:??",
  month =        jun,
  year =         "2012",
  DOI =          "https://doi.org/10.1145/2184436.2184437",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Jun 12 11:20:16 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article presents two novel approaches for
                 incorporating sentiment prior knowledge into the topic
                 model for weakly supervised sentiment analysis where
                 sentiment labels are considered as topics. One is by
                 modifying the Dirichlet prior for topic-word
                 distribution (LDA-DP), the other is by augmenting the
                 model objective function through adding terms that
                 express preferences on expectations of sentiment labels
                 of the lexicon words using generalized expectation
                 criteria (LDA-GE). We conducted extensive experiments
                 on English movie review data and multi-domain sentiment
                 dataset as well as Chinese product reviews about mobile
                 phones, digital cameras, MP3 players, and monitors. The
                 results show that while both LDA-DP and LDA-GE perform
                 comparably to existing weakly supervised sentiment
                 classification algorithms, they are much simpler and
                 computationally efficient, rendering them more suitable
                 for online and real-time sentiment classification on
                 the Web. We observed that LDA-GE is more effective than
                 LDA-DP, suggesting that it should be preferred when
                 considering employing the topic model for sentiment
                 analysis. Moreover, both models are able to extract
                 highly domain-salient polarity words from text.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing (TALIP)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Wang:2012:TUF,
  author =       "Hongling Wang and Guodong Zhou",
  title =        "Toward a Unified Framework for Standard and Update
                 Multi-Document Summarization",
  journal =      j-TALIP,
  volume =       "11",
  number =       "2",
  pages =        "5:1--5:??",
  month =        jun,
  year =         "2012",
  DOI =          "https://doi.org/10.1145/2184436.2184438",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Jun 12 11:20:16 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article presents a unified framework for
                 extracting standard and update summaries from a set of
                 documents. In particular, a topic modeling approach is
                 employed for salience determination and a dynamic
                 modeling approach is proposed for redundancy control.
                 In the topic modeling approach for salience
                 determination, we represent various kinds of text
                 units, such as word, sentence, document, documents, and
                 summary, using a single vector space model via their
                 corresponding probability distributions over the
                 inherent topics of given documents or a related corpus.
                 Therefore, we are able to calculate the similarity
                 between any two text units via their topic probability
                 distributions. In the dynamic modeling approach for
                 redundancy control, we consider the similarity between
                 the summary and the given documents, and the similarity
                 between the sentence and the summary, besides the
                 similarity between the sentence and the given
                 documents, for standard summarization while for update
                 summarization, we also consider the similarity between
                 the sentence and the history documents or summary.
                 Evaluation on TAC 2008 and 2009 in English language
                 shows encouraging results, especially the dynamic
                 modeling approach in removing the redundancy in the
                 given documents. Finally, we extend the framework to
                 Chinese multi-document summarization and experiments
                 show the effectiveness of our framework.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing (TALIP)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Andrade:2012:SEC,
  author =       "Daniel Andrade and Takuya Matsuzaki and Jun'ichi
                 Tsujii",
  title =        "Statistical Extraction and Comparison of Pivot Words
                 for Bilingual Lexicon Extension",
  journal =      j-TALIP,
  volume =       "11",
  number =       "2",
  pages =        "6:1--6:??",
  month =        jun,
  year =         "2012",
  DOI =          "https://doi.org/10.1145/2184436.2184439",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Jun 12 11:20:16 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Bilingual dictionaries can be automatically extended
                 by new translations using comparable corpora. The
                 general idea is based on the assumption that similar
                 words have similar contexts across languages. However,
                 previous studies have mainly focused on Indo-European
                 languages, or use only a bag-of-words model to describe
                 the context. Furthermore, we argue that it is helpful
                 to extract only the statistically significant context,
                 instead of using all context. The present approach
                 addresses these issues in the following manner. First,
                 based on the context of a word with an unknown
                 translation (query word), we extract salient pivot
                 words. Pivot words are words for which a translation is
                 already available in a bilingual dictionary. For the
                 extraction of salient pivot words, we use a Bayesian
                 estimation of the point-wise mutual information to
                 measure statistical significance. In the second step,
                 we match these pivot words across languages to identify
                 translation candidates for the query word. We therefore
                 calculate a similarity score between the query word and
                 a translation candidate using the probability that the
                 same pivots will be extracted for both the query word
                 and the translation candidate. The proposed method uses
                 several context positions, namely, a bag-of-words of
                 one sentence, and the successors, predecessors, and
                 siblings with respect to the dependency parse tree of
                 the sentence. In order to make these context positions
                 comparable across Japanese and English, which are
                 unrelated languages, we use several heuristics to
                 adjust the dependency trees appropriately. We
                 demonstrate that the proposed method significantly
                 increases the accuracy of word translations, as
                 compared to previous methods.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing (TALIP)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Wang:2012:IGD,
  author =       "Kun Wang and Chengqing Zong and Keh-Yih Su",
  title =        "Integrating Generative and Discriminative
                 Character-Based Models for {Chinese} Word
                 Segmentation",
  journal =      j-TALIP,
  volume =       "11",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jun,
  year =         "2012",
  DOI =          "https://doi.org/10.1145/2184436.2184440",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Jun 12 11:20:16 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Among statistical approaches to Chinese word
                 segmentation, the word-based n-gram ( generative )
                 model and the character-based tagging ( discriminative
                 ) model are two dominant approaches in the literature.
                 The former gives excellent performance for the
                 in-vocabulary (IV) words; however, it handles
                 out-of-vocabulary (OOV) words poorly. On the other
                 hand, though the latter is more robust for OOV words,
                 it fails to deliver satisfactory performance for IV
                 words. These two approaches behave differently due to
                 the unit they use (word vs. character) and the model
                 form they adopt (generative vs. discriminative). In
                 general, character-based approaches are more robust
                 than word-based ones, as the vocabulary of characters
                 is a closed set; and discriminative models are more
                 robust than generative ones, since they can flexibly
                 include all kinds of available information, such as
                 future context. This article first proposes a
                 character-based n -gram model to enhance the robustness
                 of the generative approach. Then the proposed
                 generative model is further integrated with the
                 character-based discriminative model to take advantage
                 of both approaches. Our experiments show that this
                 integrated approach outperforms all the existing
                 approaches reported in the literature. Afterwards, a
                 complete and detailed error analysis is conducted.
                 Since a significant portion of the critical errors is
                 related to numerical/foreign strings, character-type
                 information is then incorporated into the model to
                 further improve its performance. Last, the proposed
                 integrated approach is tested on cross-domain corpora,
                 and a semi-supervised domain adaptation algorithm is
                 proposed and shown to be effective in our
                 experiments.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing (TALIP)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Isozaki:2012:HBP,
  author =       "Hideki Isozaki and Katsuhito Sudoh and Hajime Tsukada
                 and Kevin Duh",
  title =        "{HPSG}-Based Preprocessing for {English-to-Japanese}
                 Translation",
  journal =      j-TALIP,
  volume =       "11",
  number =       "3",
  pages =        "8:1--8:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2334801.2334802",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Sep 11 14:17:04 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Japanese sentences have completely different word
                 orders from corresponding English sentences. Typical
                 phrase-based statistical machine translation (SMT)
                 systems such as Moses search for the best word
                 permutation within a given distance limit (distortion
                 limit). For English-to-Japanese translation, we need a
                 large distance limit to obtain acceptable translations,
                 and the number of translation candidates is extremely
                 large. Therefore, SMT systems often fail to find
                 acceptable translations within a limited time. To solve
                 this problem, some researchers use rule-based
                 preprocessing approaches, which reorder English words
                 just like Japanese by using dozens of rules. Our idea
                 is based on the following two observations: (1)
                 Japanese is a typical head-final language, and (2) we
                 can detect heads of English sentences by a head-driven
                 phrase structure grammar (HPSG) parser. The main
                 contributions of this article are twofold: First, we
                 demonstrate how off-the-shelf, state-of-the-art HPSG
                 parser enables us to write the reordering rules in an
                 abstract level and can easily improve the quality of
                 English-to-Japanese translation. Second, we also show
                 that syntactic heads achieve better results than
                 semantic heads. The proposed method outperforms the
                 best system of NTCIR-7 PATMT EJ task.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Zhang:2012:ABH,
  author =       "Lidan Zhang and Kwop-Ping Chan",
  title =        "Adaptive {Bayesian HMM} for Fully Unsupervised
                 {Chinese} Part-of-Speech Induction",
  journal =      j-TALIP,
  volume =       "11",
  number =       "3",
  pages =        "9:1--9:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2334801.2334803",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Sep 11 14:17:04 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "We propose an adaptive Bayesian hidden Markov model
                 for fully unsupervised part-of-speech (POS) induction.
                 The proposed model with its inference algorithm has two
                 extensions to the first-order Bayesian HMM with
                 Dirichlet priors. First our algorithm infers the
                 optimal number of hidden states from the training
                 corpus rather than fixes the dimensionality of state
                 space beforehand. The second extension studies the
                 Chinese unknown word processing module which measures
                 similarities from both morphological properties and
                 context distribution. Experimental results showed that
                 both of these two extensions can help to find the
                 optimal categories for Chinese in terms of both
                 unsupervised clustering metrics and grammar induction
                 accuracies on the Chinese Treebank.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Lee:2012:SMB,
  author =       "Jinsik Lee and Sungjin Lee and Jonghoon Lee and
                 Byeongchang Kim and Gary Geunbae Lee",
  title =        "Stacking Model-Based {Korean} Prosodic Phrasing Using
                 Speaker Variability Reduction and Linguistic Feature
                 Engineering",
  journal =      j-TALIP,
  volume =       "11",
  number =       "3",
  pages =        "10:1--10:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2334801.2334804",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Sep 11 14:17:04 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article presents a prosodic phrasing model for a
                 general purpose Korean speech synthesis system. To
                 reflect the factors affecting prosodic phrasing in the
                 model, linguistically motivated machine-learning
                 features were investigated. These features were
                 effectively incorporated using a stacking model. The
                 phrasing performance was also improved through feature
                 engineering. The corpus used in the experiment is a
                 4,392-sentence corpus (55,015 words with an average of
                 13 words per sentence). Because the corpus contains
                 speaker-dependent variability and such variability is
                 not appropriately reflected in a general purpose speech
                 synthesis system, a method to reduce such variability
                 is proposed. In addition, the entire set of data used
                 in the experiment is provided to the public for future
                 use in comparative research.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Duc:2012:CLL,
  author =       "Nguyen Tuan Duc and Danushka Bollegala and Mitsuru
                 Ishizuka",
  title =        "Cross-Language Latent Relational Search between
                 {Japanese} and {English} Languages Using a {Web}
                 Corpus",
  journal =      j-TALIP,
  volume =       "11",
  number =       "3",
  pages =        "11:1--11:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2334801.2334805",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Tue Sep 11 14:17:04 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Latent relational search is a novel entity retrieval
                 paradigm based on the proportional analogy between two
                 entity pairs. Given a latent relational search query
                 {(Japan, Tokyo), (France, ?)}, a latent relational
                 search engine is expected to retrieve and rank the
                 entity ``Paris'' as the first answer in the result
                 list. A latent relational search engine extracts
                 entities and relations between those entities from a
                 corpus, such as the Web. Moreover, from some supporting
                 sentences in the corpus, (e.g., ``Tokyo is the capital
                 of Japan'' and ``Paris is the capital and biggest city
                 of France''), the search engine must recognize the
                 relational similarity between the two entity pairs. In
                 cross-language latent relational search, the entity
                 pairs as well as the supporting sentences of the first
                 entity pair and of the second entity pair are in
                 different languages. Therefore, the search engine must
                 recognize similar semantic relations across languages.
                 In this article, we study the problem of cross-language
                 latent relational search between Japanese and English
                 using Web data. To perform cross-language latent
                 relational search in high speed, we propose a
                 multi-lingual indexing method for storing entities and
                 lexical patterns that represent the semantic relations
                 extracted from Web corpora. We then propose a hybrid
                 lexical pattern clustering algorithm to capture the
                 semantic similarity between lexical patterns across
                 languages. Using this algorithm, we can precisely
                 measure the relational similarity between entity pairs
                 across languages, thereby achieving high precision in
                 the task of cross-language latent relational search.
                 Experiments show that the proposed method achieves an
                 MRR of 0.605 on Japanese-English cross-language latent
                 relational search query sets and it also achieves a
                 reasonable performance on the INEX Entity Ranking
                 task.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Mitamura:2012:ISI,
  author =       "Teruko Mitamura and Noriko Kando and Koichi Takeda",
  title =        "Introduction to the Special Issue on {RITE}",
  journal =      j-TALIP,
  volume =       "11",
  number =       "4",
  pages =        "12:1--12:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382593.2382594",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 6 07:40:55 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Miyao:2012:ETE,
  author =       "Yusuke Miyao and Hideki Shima and Hiroshi Kanayama and
                 Teruko Mitamura",
  title =        "Evaluating Textual Entailment Recognition for
                 University Entrance Examinations",
  journal =      j-TALIP,
  volume =       "11",
  number =       "4",
  pages =        "13:1--13:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382593.2382595",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 6 07:40:55 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The present article addresses an attempt to apply
                 questions in university entrance examinations to the
                 evaluation of textual entailment recognition. Questions
                 in several fields, such as history and politics,
                 primarily test the examinee's knowledge in the form of
                 choosing true statements from multiple choices.
                 Answering such questions can be regarded as equivalent
                 to finding evidential texts from a textbase such as
                 textbooks and Wikipedia. Therefore, this task can be
                 recast as recognizing textual entailment between a
                 description in a textbase and a statement given in a
                 question. We focused on the National Center Test for
                 University Admission in Japan and converted questions
                 into the evaluation data for textual entailment
                 recognition by using Wikipedia as a textbase.
                 Consequently, it is revealed that nearly half of the
                 questions can be mapped into textual entailment
                 recognition; 941 text pairs were created from 404
                 questions from six subjects. This data set is provided
                 for a subtask of NTCIR RITE (Recognizing Inference in
                 Text), and 16 systems from six teams used the data set
                 for evaluation. The evaluation results revealed that
                 the best system achieved a correct answer ratio of
                 56\%, which is significantly better than a random
                 choice baseline.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Pham:2012:LRT,
  author =       "Minh Quang Nhat Pham and Minh Le Nguyen and Akira
                 Shimazu",
  title =        "Learning to Recognize Textual Entailment in {Japanese}
                 Texts with the Utilization of Machine Translation",
  journal =      j-TALIP,
  volume =       "11",
  number =       "4",
  pages =        "14:1--14:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382593.2382596",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 6 07:40:55 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Recognizing Textual Entailment (RTE) is a fundamental
                 task in Natural Language Understanding. The task is to
                 decide whether the meaning of a text can be inferred
                 from the meaning of another one. In this article, we
                 conduct an empirical study of recognizing textual
                 entailment in Japanese texts, in which we adopt a
                 machine learning-based approach to the task. We
                 quantitatively analyze the effects of various
                 entailment features, machine learning algorithms, and
                 the impact of RTE resources on the performance of an
                 RTE system. This article also investigates the use of
                 machine translation for the RTE task and determines
                 whether machine translation can be used to improve the
                 performance of our RTE system. Experimental results
                 achieved on benchmark data sets show that our machine
                 learning-based RTE system outperforms the baseline
                 methods based on lexical matching and syntactic
                 matching. The results also suggest that the machine
                 translation component can be utilized to improve the
                 performance of the RTE system.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Qiu:2012:RIT,
  author =       "Xipeng Qiu and Ling Cao and Zhao Liu and Xuanjing
                 Huang",
  title =        "Recognizing Inference in Texts with {Markov} Logic
                 Networks",
  journal =      j-TALIP,
  volume =       "11",
  number =       "4",
  pages =        "15:1--15:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382593.2382597",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 6 07:40:55 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Recognizing inference in texts (RITE) attracts growing
                 attention of natural language processing (NLP)
                 researchers in recent years. In this article, we
                 propose a novel approach to recognize inference with
                 probabilistic logical reasoning. Our approach is built
                 on Markov logic networks (MLNs) framework, which is a
                 probabilistic extension of first-order logic. We design
                 specific semantic rules based on the surface,
                 syntactic, and semantic representations of texts, and
                 map these rules to logical representations. We also
                 extract information from some knowledge bases as common
                 sense logic rules. Then we utilize MLNs framework to
                 make predictions with combining statistical and logical
                 reasoning. Experiment results shows that our system can
                 achieve better performance than state-of-the-art RITE
                 systems.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Shibata:2012:PAS,
  author =       "Tomohide Shibata and Sadao Kurohashi",
  title =        "Predicate-Argument Structure-Based Textual Entailment
                 Recognition System Exploiting Wide-Coverage Lexical
                 Knowledge",
  journal =      j-TALIP,
  volume =       "11",
  number =       "4",
  pages =        "16:1--16:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382593.2382598",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 6 07:40:55 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article proposes a predicate-argument structure
                 based Textual Entailment Recognition system exploiting
                 wide-coverage lexical knowledge. Different from
                 conventional machine learning approaches where several
                 features obtained from linguistic analysis and
                 resources are utilized, our proposed method regards a
                 predicate-argument structure as a basic unit, and
                 performs the matching/alignment between a text and
                 hypothesis. In matching between predicate-arguments,
                 wide-coverage relations between words/phrases such as
                 synonym and is-a are utilized, which are automatically
                 acquired from a dictionary, Web corpus, and
                 Wikipedia.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Shih:2012:VCT,
  author =       "Chengwei Shih and Chengwei Lee and Richard Tzonghan
                 Tsai and Wenlian Hsu",
  title =        "Validating Contradiction in Texts Using Online
                 Co-Mention Pattern Checking",
  journal =      j-TALIP,
  volume =       "11",
  number =       "4",
  pages =        "17:1--17:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382593.2382599",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 6 07:40:55 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Detecting contradictive statements is a foundational
                 and challenging task for text understanding
                 applications such as textual entailment. In this
                 article, we aim to address the problem of the shortage
                 of specific background knowledge in contradiction
                 detection. A novel contradiction detecting approach
                 based on the distribution of the query composed of
                 critical mismatch combinations on the Internet is
                 proposed to tackle the problem. By measuring the
                 availability of mismatch conjunction phrases (MCPs),
                 the background knowledge about two target statements
                 can be implicitly obtained for identifying
                 contradictions. Experiments on three different
                 configurations show that the MCP-based approach
                 achieves remarkable improvement on contradiction
                 detection and can significantly improve the performance
                 of textual entailment recognition.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Watanabe:2012:LDL,
  author =       "Yotaro Watanabe and Junta Mizuno and Eric Nichols and
                 Katsuma Narisawa and Keita Nabeshima and Naoaki Okazaki
                 and Kentaro Inui",
  title =        "Leveraging Diverse Lexical Resources for Textual
                 Entailment Recognition",
  journal =      j-TALIP,
  volume =       "11",
  number =       "4",
  pages =        "18:1--18:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382593.2382600",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Dec 6 07:40:55 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Since the problem of textual entailment recognition
                 requires capturing semantic relations between diverse
                 expressions of language, linguistic and world knowledge
                 play an important role. In this article, we explore the
                 effectiveness of different types of currently available
                 resources including synonyms, antonyms,
                 hypernym-hyponym relations, and lexical entailment
                 relations for the task of textual entailment
                 recognition. In order to do so, we develop an
                 entailment relation recognition system which utilizes
                 diverse linguistic analyses and resources to align the
                 linguistic units in a pair of texts and identifies
                 entailment relations based on these alignments. We use
                 the Japanese subset of the NTCIR-9 RITE-1 dataset for
                 evaluation and error analysis, conducting ablation
                 testing and evaluation on hand-crafted alignment gold
                 standard data to evaluate the contribution of
                 individual resources. Error analysis shows that
                 existing knowledge sources are effective for RTE, but
                 that their coverage is limited, especially for
                 domain-specific and other low-frequency expressions. To
                 increase alignment coverage on such expressions, we
                 propose a method of alignment inference that uses
                 syntactic and semantic dependency information to
                 identify likely alignments without relying on external
                 resources. Evaluation adding alignment inference to a
                 system using all available knowledge sources shows
                 improvements in both precision and recall of entailment
                 relation recognition.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Hao:2013:TPP,
  author =       "Tianyong Hao and Chunshen Zhu",
  title =        "Toward a Professional Platform for {Chinese} Character
                 Conversion",
  journal =      j-TALIP,
  volume =       "12",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2425327.2425328",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Mar 2 09:25:42 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Increasing communication among Chinese-speaking
                 regions using respectively traditional and simplified
                 Chinese character systems has highlighted the
                 subtle-yet-extensive differences between the two
                 systems, which can lead to unexpected hindrance in
                 converting characters from one to the other. This
                 article proposes a new priority-based multi-data
                 resources management model, with a new algorithm called
                 Fused Conversion algorithm from Multi-Data resources
                 (FCMD), to ensure more context-sensitive, human
                 controllable, and thus more reliable conversions, by
                 drawing on reverse maximum matching, n -gram-based
                 statistical model and pattern-based learning and
                 matching. After parameter training on the Tagged
                 Chinese Gigaword corpus, its conversion precision
                 reaches 91.5\% in context-sensitive cases, the most
                 difficult part in the conversion, with an overall
                 precision rate at 99.8\%, a significant improvement
                 over the state-of-the-art models. The conversion
                 platform based on the model has extra features such as
                 data resource selection and $n$-grams self-learning
                 ability, providing a more sophisticated tool good
                 especially for high-end professional uses.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Jiang:2013:LRC,
  author =       "Mike Tian-Jian Jiang and Tsung-Hsien Lee and Wen-Lian
                 Hsu",
  title =        "The Left and Right Context of a Word: Overlapping
                 {Chinese} Syllable Word Segmentation with Minimal
                 Context",
  journal =      j-TALIP,
  volume =       "12",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2425327.2425329",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Mar 2 09:25:42 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Since a Chinese syllable can correspond to many
                 characters (homophones), the syllable-to-character
                 conversion task is quite challenging for Chinese
                 phonetic input methods (CPIM). There are usually two
                 stages in a CPIM: 1. segment the syllable sequence into
                 syllable words, and 2. select the most likely character
                 words for each syllable word. A CPIM usually assumes
                 that the input is a complete sentence, and evaluates
                 the performance based on a well-formed corpus. However,
                 in practice, most Pinyin users prefer progressive text
                 entry in several short chunks, mainly in one or two
                 words each (most Chinese words consist of two or more
                 characters). Short chunks do not provide enough
                 contexts to perform the best possible
                 syllable-to-character conversion, especially when a
                 chunk consists of overlapping syllable words. In such
                 cases, a conversion system often selects the boundary
                 of a word with the highest frequency. Short chunk input
                 is even more popular on platforms with limited
                 computing power, such as mobile phones. Based on the
                 observation that the relative strength of a word can be
                 quite different when calculated leftwards or
                 rightwards, we propose a simple division of the word
                 context into the left context and the right context.
                 Furthermore, we design a double ranking strategy for
                 each word to reduce the number of errors in Step 1. Our
                 strategy is modeled as the minimum feedback arc set
                 problem on bipartite tournament with approximate
                 solutions derived from genetic algorithm. Experiments
                 show that, compared to the frequency-based method (FBM)
                 (low memory and fast) and the conditional random fields
                 (CRF) model (larger memory and slower), our double
                 ranking strategy has the benefits of less memory and
                 low power requirement with competitive performance. We
                 believe a similar strategy could also be adopted to
                 disambiguate conflicting linguistic patterns
                 effectively.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Bach:2013:TPF,
  author =       "Ngo Xuan Bach and Nguyen Le Minh and Tran Thi Oanh and
                 Akira Shimazu",
  title =        "A Two-Phase Framework for Learning Logical Structures
                 of Paragraphs in Legal Articles",
  journal =      j-TALIP,
  volume =       "12",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2425327.2425330",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Mar 2 09:25:42 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Analyzing logical structures of texts is important to
                 understanding natural language, especially in the legal
                 domain, where legal texts have their own specific
                 characteristics. Recognizing logical structures in
                 legal texts does not only help people in understanding
                 legal documents, but also in supporting other tasks in
                 legal text processing. In this article, we present a
                 new task, learning logical structures of paragraphs in
                 legal articles, which is studied in research on Legal
                 Engineering. The goals of this task are recognizing
                 logical parts of law sentences in a paragraph, and then
                 grouping related logical parts into some logical
                 structures of formulas, which describe logical
                 relations between logical parts. We present a two-phase
                 framework to learn logical structures of paragraphs in
                 legal articles. In the first phase, we model the
                 problem of recognizing logical parts in law sentences
                 as a multi-layer sequence learning problem, and present
                 a CRF-based model to recognize them. In the second
                 phase, we propose a graph-based method to group logical
                 parts into logical structures. We consider the problem
                 of finding a subset of complete subgraphs in a
                 weighted-edge complete graph, where each node
                 corresponds to a logical part, and a complete subgraph
                 corresponds to a logical structure. We also present an
                 integer linear programming formulation for this
                 optimization problem. Our models achieve 74.37\% in
                 recognizing logical parts, 80.08\% in recognizing
                 logical structures, and 58.36\% in the whole task on
                 the Japanese National Pension Law corpus. Our work
                 provides promising results for further research on this
                 interesting task.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sundaram:2013:AFB,
  author =       "Suresh Sundaram and A. G. Ramakrishnan",
  title =        "Attention-Feedback Based Robust Segmentation of Online
                 Handwritten Isolated {Tamil} Words",
  journal =      j-TALIP,
  volume =       "12",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2425327.2425331",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Mar 2 09:25:42 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "In this article, we propose a lexicon-free,
                 script-dependent approach to segment online handwritten
                 isolated Tamil words into its constituent symbols. Our
                 proposed segmentation strategy comprises two modules,
                 namely the (1) Dominant Overlap Criterion Segmentation
                 (DOCS) module and (2) Attention Feedback Segmentation
                 (AFS) module. Based on a bounding box overlap criterion
                 in the DOCS module, the input word is first segmented
                 into stroke groups. A stroke group may at times
                 correspond to a part of a valid symbol
                 (over-segmentation) or a merger of valid symbols
                 (under-segmentation). Attention on specific features in
                 the AFS module serve in detecting possibly
                 over-segmented or under-segmented stroke groups.
                 Thereafter, feedbacks from the SVM classifier
                 likelihoods and stroke-group based features are
                 considered in modifying the suspected stroke groups to
                 form valid symbols. The proposed scheme is tested on a
                 set of 10000 isolated handwritten words (containing
                 53,246 Tamil symbols). The results show that the DOCS
                 module achieves a symbol-level segmentation accuracy of
                 98.1\%, which improves to as high as 99.7\% after the
                 AFS strategy. This in turn entails a symbol recognition
                 rate of 83.9\% (at the DOCS module) and 88.4\% (after
                 the AFS module). The resulting word recognition rates
                 at the DOCS and AFS modules are found to be, 50.9\% and
                 64.9\% respectively, without any postprocessing.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sun:2013:LAC,
  author =       "Xu Sun and Naoaki Okazaki and Jun'ichi Tsujii and
                 Houfeng Wang",
  title =        "Learning Abbreviations from {Chinese} and {English}
                 Terms by Modeling Non-Local Information",
  journal =      j-TALIP,
  volume =       "12",
  number =       "2",
  pages =        "5:1--5:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2461316.2461317",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jun 6 06:48:55 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The present article describes a robust approach for
                 abbreviating terms. First, in order to incorporate
                 non-local information into abbreviation generation
                 tasks, we present both implicit and explicit solutions:
                 the latent variable model and the label encoding with
                 global information. Although the two approaches compete
                 with one another, we find they are also highly
                 complementary. We propose a combination of the two
                 approaches, and we will show the proposed method
                 outperforms all of the existing methods on abbreviation
                 generation datasets. In order to reduce computational
                 complexity of learning non-local information, we
                 further present an online training method, which can
                 arrive the objective optimum with accelerated training
                 speed. We used a Chinese newswire dataset and a English
                 biomedical dataset for experiments. Experiments
                 revealed that the proposed abbreviation generator with
                 non-local information achieved the best results for
                 both the Chinese and English languages.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Hinkle:2013:DES,
  author =       "Lauren Hinkle and Albert Brouillette and Sujay Jayakar
                 and Leigh Gathings and Miguel Lezcano and Jugal
                 Kalita",
  title =        "Design and Evaluation of Soft Keyboards for {Brahmic}
                 Scripts",
  journal =      j-TALIP,
  volume =       "12",
  number =       "2",
  pages =        "6:1--6:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2461316.2461318",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jun 6 06:48:55 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Despite being spoken by a large percentage of the
                 world, Indic languages in general lack user-friendly
                 and efficient methods for text input. These languages
                 have poor or no support for typing. Soft keyboards,
                 because of their ease of installation and lack of
                 reliance on specific hardware, are a promising solution
                 as an input device for many languages. Developing an
                 acceptable soft keyboard requires the frequency
                 analysis of characters in order to design a layout that
                 minimizes text-input time. This article proposes the
                 use of various development techniques, layout
                 variations, and evaluation methods for the creation of
                 soft keyboards for Brahmic scripts. We propose that
                 using optimization techniques such as genetic
                 algorithms and multi-objective Pareto optimization to
                 develop multi-layer keyboards will increase the speed
                 at which text can be entered.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Fujita:2013:WSD,
  author =       "Sanae Fujita and Akinori Fujino",
  title =        "Word Sense Disambiguation by Combining Labeled Data
                 Expansion and Semi-Supervised Learning Method",
  journal =      j-TALIP,
  volume =       "12",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2461316.2461319",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Jun 6 06:48:55 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Lack of labeled data is one of the severest problems
                 facing word sense disambiguation (WSD). We overcome the
                 problem by proposing a method that combines automatic
                 labeled data expansion (Step 1) and semi-supervised
                 learning (Step 2). The Step 1 and 2 methods are both
                 effective, but their combination yields a synergistic
                 effect. In this article, in Step 1, we automatically
                 extract reliable labeled data from raw corpora using
                 dictionary example sentences, even the infrequent and
                 unseen senses (which are not likely to appear in
                 labeled data). Next, in Step 2, we apply a
                 semi-supervised classifier and achieve an improvement
                 using easy-to-get unlabeled data. In this step, we also
                 show that we can guess even unseen senses. We target a
                 SemEval-2010 Japanese WSD task, which is a lexical
                 sample task. Both Step 1 and Step 2 methods performed
                 better than the best published result (76.4 \%).
                 Furthermore, the combined method achieved much higher
                 accuracy (84.2 \%). In this experiment, up to 50 \% of
                 unseen senses are classified correctly. However, the
                 number of unseen senses are small, therefore, we delete
                 one senses per word and apply our proposed method; the
                 results show that the method is effective and robust
                 even for unseen senses.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sproat:2013:EGN,
  author =       "Richard Sproat",
  title =        "Editorial Greetings from the new {Editor-in--Chief}",
  journal =      j-TALIP,
  volume =       "12",
  number =       "3",
  pages =        "8:1--8:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2499955.2499956",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Aug 19 18:39:55 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Fukunishi:2013:BAA,
  author =       "Takaaki Fukunishi and Andrew Finch and Seiichi
                 Yamamoto and Eiichiro Sumita",
  title =        "A {Bayesian} Alignment Approach to Transliteration
                 Mining",
  journal =      j-TALIP,
  volume =       "12",
  number =       "3",
  pages =        "9:1--9:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2499955.2499957",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Aug 19 18:39:55 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "In this article we present a technique for mining
                 transliteration pairs using a set of simple features
                 derived from a many-to-many bilingual forced-alignment
                 at the grapheme level to classify candidate
                 transliteration word pairs as correct transliterations
                 or not. We use a nonparametric Bayesian method for the
                 alignment process, as this process rewards the reuse of
                 parameters, resulting in compact models that align in a
                 consistent manner and tend not to over-fit. Our
                 approach uses the generative model resulting from
                 aligning the training data to force-align the test
                 data. We rely on the simple assumption that correct
                 transliteration pairs would be well modeled and
                 generated easily, whereas incorrect pairs---being more
                 random in character---would be more costly to model and
                 generate. Our generative model generates by
                 concatenating bilingual grapheme sequence pairs. The
                 many-to-many generation process is essential for
                 handling many languages with non-Roman scripts, and it
                 is hard to train well using a maximum likelihood
                 techniques, as these tend to over-fit the data. Our
                 approach works on the principle that generation using
                 only grapheme sequence pairs that are in the model
                 results in a high probability derivation, whereas if
                 the model is forced to introduce a new parameter in
                 order to explain part of the candidate pair, the
                 derivation probability is substantially reduced and
                 severely reduced if the new parameter corresponds to a
                 sequence pair composed of a large number of graphemes.
                 The features we extract from the alignment of the test
                 data are not only based on the scores from the
                 generative model, but also on the relative proportions
                 of each sequence that are hard to generate. The
                 features are used in conjunction with a support vector
                 machine classifier trained on known positive examples
                 together with synthetic negative examples to determine
                 whether a candidate word pair is a correct
                 transliteration pair. In our experiments, we used all
                 data tracks from the 2010 Named-Entity Workshop
                 (NEWS'10) and use the performance of the best system
                 for each language pair as a reference point. Our
                 results show that the new features we propose are
                 powerfully predictive, enabling our approach to achieve
                 levels of performance on this task that are comparable
                 to the state of the art.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Iwakura:2013:NER,
  author =       "Tomoya Iwakura and Hiroya Takamura and Manabu
                 Okumura",
  title =        "A Named Entity Recognition Method Based on
                 Decomposition and Concatenation of Word Chunks",
  journal =      j-TALIP,
  volume =       "12",
  number =       "3",
  pages =        "10:1--10:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2499955.2499958",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Aug 19 18:39:55 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "We propose a named entity (NE) recognition method in
                 which word chunks are repeatedly decomposed and
                 concatenated. Our method identifies word chunks with a
                 base chunker, such as a noun phrase chunker, and then
                 recognizes NEs from the recognized word chunk
                 sequences. By using word chunks, we can obtain features
                 that cannot be obtained in word-sequence-based
                 recognition methods, such as the first word of a word
                 chunk, the last word of a word chunk, and so on.
                 However, each word chunk may include a part of an NE or
                 multiple NEs. To solve this problem, we use the
                 following operators: SHIFT for separating the first
                 word from a word chunk, POP for separating the last
                 word from a word chunk, JOIN for concatenating two word
                 chunks, and REDUCE for assigning an NE label to a word
                 chunk. We evaluate our method on a Japanese NE
                 recognition dataset that includes about 200,000
                 annotations of 191 types of NEs from over 8,500 news
                 articles. The experimental results show that the
                 training and processing speeds of our method are faster
                 than those of a linear-chain structured perceptron and
                 a semi-Markov perceptron, while maintaining high
                 accuracy.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Izumi:2013:NCF,
  author =       "Tomoko Izumi and Kenji Imamura and Taichi Asami and
                 Kuniko Saito and Genichiro Kikui and Satoshi Sato",
  title =        "Normalizing Complex Functional Expressions in
                 {Japanese} Predicates: Linguistically-Directed
                 Rule-Based Paraphrasing and Its Application",
  journal =      j-TALIP,
  volume =       "12",
  number =       "3",
  pages =        "11:1--11:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2499955.2499959",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Aug 19 18:39:55 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The growing need for text mining systems, such as
                 opinion mining, requires a deep semantic understanding
                 of the target language. In order to accomplish this,
                 extracting the semantic information of functional
                 expressions plays a crucial role, because functional
                 expressions such as would like to and can't are key
                 expressions to detecting customers' needs and wants.
                 However, in Japanese, functional expressions appear in
                 the form of suffixes, and two different types of
                 functional expressions are merged into one predicate:
                 one influences the factual meaning of the predicate
                 while the other is merely used for discourse purposes.
                 This triggers an increase in surface forms, which
                 hinders information extraction systems. In this
                 article, we present a novel normalization technique
                 that paraphrases complex functional expressions into
                 simplified forms that retain only the crucial meaning
                 of the predicate. We construct paraphrasing rules based
                 on linguistic theories in syntax and semantics. The
                 results of experiments indicate that our system
                 achieves a high accuracy of 79.7\%, while it reduces
                 the differences in functional expressions by up to
                 66.7\%. The results also show an improvement in the
                 performance of predicate extraction, providing
                 encouraging evidence of the usability of paraphrasing
                 as a means of normalizing different language
                 expressions.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sudoh:2013:SBP,
  author =       "Katsuhito Sudoh and Xianchao Wu and Kevin Duh and
                 Hajime Tsukada and Masaaki Nagata",
  title =        "Syntax-Based Post-Ordering for Efficient
                 {Japanese-to-English} Translation",
  journal =      j-TALIP,
  volume =       "12",
  number =       "3",
  pages =        "12:1--12:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2499955.2499960",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Aug 19 18:39:55 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article proposes a novel reordering method for
                 efficient two-step Japanese-to-English statistical
                 machine translation (SMT) that isolates reordering from
                 SMT and solves it after lexical translation. This
                 reordering problem, called post-ordering, is solved as
                 an SMT problem from Head-Final English (HFE) to
                 English. HFE is syntax-based reordered English that is
                 very successfully used for reordering with
                 English-to-Japanese SMT. The proposed method
                 incorporates its advantage into the reverse direction,
                 Japanese-to-English, and solves the post-ordering
                 problem by accurate syntax-based SMT with target
                 language syntax. Two-step SMT with the proposed
                 post-ordering empirically reduces the decoding time of
                 the accurate but slow syntax-based SMT by its good
                 approximation using intermediate HFE. The proposed
                 method improves the decoding speed of syntax-based SMT
                 decoding by about six times with comparable translation
                 accuracy in Japanese-to-English patent translation
                 experiments.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sproat:2013:TP,
  author =       "Richard Sproat",
  title =        "{TALIP} Perspectives",
  journal =      j-TALIP,
  volume =       "12",
  number =       "4",
  pages =        "13:1--13:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2523057.2523058",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Oct 30 12:33:24 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Paul:2013:HCB,
  author =       "Michael Paul and Andrew Finch and Eiichrio Sumita",
  title =        "How to Choose the Best Pivot Language for Automatic
                 Translation of Low-Resource Languages",
  journal =      j-TALIP,
  volume =       "12",
  number =       "4",
  pages =        "14:1--14:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2505126",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Oct 30 12:33:24 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Recent research on multilingual statistical machine
                 translation focuses on the usage of pivot languages in
                 order to overcome language resource limitations for
                 certain language pairs. Due to the richness of
                 available language resources, English is, in general,
                 the pivot language of choice. However, factors like
                 language relatedness can also effect the choice of the
                 pivot language for a given language pair, especially
                 for Asian languages, where language resources are
                 currently quite limited. In this article, we provide
                 new insights into what factors make a pivot language
                 effective and investigate the impact of these factors
                 on the overall pivot translation performance for
                 translation between 22 Indo-European and Asian
                 languages. Experimental results using state-of-the-art
                 statistical machine translation techniques revealed
                 that the translation quality of 54.8\% of the language
                 pairs improved when a non-English pivot language was
                 chosen. Moreover, 81.0\% of system performance
                 variations can be explained by a combination of factors
                 such as language family, vocabulary, sentence length,
                 language perplexity, translation model entropy,
                 reordering, monotonicity, and engine performance.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Huang:2013:CAT,
  author =       "Chung-Chi Huang and Mei-Hua Chen and Ping-Che Yang and
                 Jason S. Chang",
  title =        "A Computer-Assisted Translation and Writing System",
  journal =      j-TALIP,
  volume =       "12",
  number =       "4",
  pages =        "15:1--15:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2505984",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Oct 30 12:33:24 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "We introduce a method for learning to predict text and
                 grammatical construction in a computer-assisted
                 translation and writing framework. In our approach,
                 predictions are offered on the fly to help the user
                 make appropriate lexical and grammar choices during the
                 translation of a source text, thus improving
                 translation quality and productivity. The method
                 involves automatically generating general-to-specific
                 word usage summaries (i.e., writing suggestion module),
                 and automatically learning high-confidence word- or
                 phrase-level translation equivalents (i.e., translation
                 suggestion module). At runtime, the source text and its
                 translation prefix entered by the user are broken down
                 into $n$-grams to generate grammar and translation
                 predictions, which are further combined and ranked via
                 translation and language models. These ranked
                 prediction candidates are iteratively and interactively
                 displayed to the user in a pop-up menu as translation
                 or writing hints. We present a prototype writing
                 assistant, TransAhead, that applies the method to a
                 human-computer collaborative environment. Automatic and
                 human evaluations show that novice translators or
                 language learners substantially benefit from our system
                 in terms of translation performance (i.e., translation
                 accuracy and productivity) and language learning (i.e.,
                 collocation usage and grammar). In general, our
                 methodology of inline grammar and text predictions or
                 suggestions has great potential in the field of
                 computer-assisted translation, writing, or even
                 language learning.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Chu:2013:CJM,
  author =       "Chenhui Chu and Toshiaki Nakazawa and Daisuke Kawahara
                 and Sadao Kurohashi",
  title =        "{Chinese--Japanese} Machine Translation Exploiting
                 {Chinese} Characters",
  journal =      j-TALIP,
  volume =       "12",
  number =       "4",
  pages =        "16:1--16:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2523057.2523059",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Oct 30 12:33:24 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The Chinese and Japanese languages share Chinese
                 characters. Since the Chinese characters in Japanese
                 originated from ancient China, many common Chinese
                 characters exist between these two languages. Since
                 Chinese characters contain significant semantic
                 information and common Chinese characters share the
                 same meaning in the two languages, they can be quite
                 useful in Chinese--Japanese machine translation (MT).
                 We therefore propose a method for creating a Chinese
                 character mapping table for Japanese, traditional
                 Chinese, and simplified Chinese, with the aim of
                 constructing a complete resource of common Chinese
                 characters. Furthermore, we point out two main problems
                 in Chinese word segmentation for Chinese--Japanese MT,
                 namely, unknown words and word segmentation
                 granularity, and propose an approach exploiting common
                 Chinese characters to solve these problems. We also
                 propose a statistical method for detecting other
                 semantically equivalent Chinese characters other than
                 the common ones and a method for exploiting shared
                 Chinese characters in phrase alignment. Results of the
                 experiments carried out on a state-of-the-art
                 phrase-based statistical MT system and an example-based
                 MT system show that our proposed approaches can improve
                 MT performance significantly, thereby verifying the
                 effectiveness of shared Chinese characters for
                 Chinese--Japanese MT.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Goto:2013:POP,
  author =       "Isao Goto and Masao Utiyama and Eiichiro Sumita",
  title =        "Post-Ordering by Parsing with {ITG} for
                 {Japanese--English} Statistical Machine Translation",
  journal =      j-TALIP,
  volume =       "12",
  number =       "4",
  pages =        "17:1--17:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2518100",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Oct 30 12:33:24 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Word reordering is a difficult task for translation
                 between languages with widely different word orders,
                 such as Japanese and English. A previously proposed
                 post-ordering method for Japanese-to-English
                 translation first translates a Japanese sentence into a
                 sequence of English words in a word order similar to
                 that of Japanese, then reorders the sequence into an
                 English word order. We employed this post-ordering
                 framework and improved upon its reordering method. The
                 existing post-ordering method reorders the sequence of
                 English words via SMT, whereas our method reorders the
                 sequence by (1) parsing the sequence using ITG to
                 obtain syntactic structures which are similar to
                 Japanese syntactic structures, and (2) transferring the
                 obtained syntactic structures into English syntactic
                 structures according to the ITG. The experiments using
                 Japanese-to-English patent translation demonstrated the
                 effectiveness of our method and showed that both the
                 RIBES and BLEU scores were improved over compared
                 methods.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Strotgen:2014:TML,
  author =       "Jannik Str{\"o}tgen and Ayser Armiti and Tran Van Canh
                 and Julian Zell and Michael Gertz",
  title =        "Time for More Languages: Temporal Tagging of {Arabic},
                 {Italian}, {Spanish}, and {Vietnamese}",
  journal =      j-TALIP,
  volume =       "13",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2540989",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Feb 27 12:18:55 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Most of the research on temporal tagging so far is
                 done for processing English text documents. There are
                 hardly any multilingual temporal taggers supporting
                 more than two languages. Recently, the temporal tagger
                 HeidelTime has been made publicly available, supporting
                 the integration of new languages by developing
                 language-dependent resources without modifying the
                 source code. In this article, we describe our work on
                 developing such resources for two Asian and two Romance
                 languages: Arabic, Vietnamese, Spanish, and Italian.
                 While temporal tagging of the two Romance languages has
                 been addressed before, there has been almost no
                 research on Arabic and Vietnamese temporal tagging so
                 far. Furthermore, we analyze language-dependent
                 challenges for temporal tagging and explain the
                 strategies we followed to address them. Our evaluation
                 results on publicly available and newly annotated
                 corpora demonstrate the high quality of our new
                 resources for the four languages, which we make
                 publicly available to the research community.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Goto:2014:DMB,
  author =       "Isao Goto and Masao Utiyama and Eiichiro Sumita and
                 Akihiro Tamura and Sadao Kurohashi",
  title =        "Distortion Model Based on Word Sequence Labeling for
                 Statistical Machine Translation",
  journal =      j-TALIP,
  volume =       "13",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2537128",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Feb 27 12:18:55 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article proposes a new distortion model for
                 phrase-based statistical machine translation. In
                 decoding, a distortion model estimates the source word
                 position to be translated next (subsequent position;
                 SP) given the last translated source word position
                 (current position; CP). We propose a distortion model
                 that can simultaneously consider the word at the CP,
                 the word at an SP candidate, the context of the CP and
                 an SP candidate, relative word order among the SP
                 candidates, and the words between the CP and an SP
                 candidate. These considered elements are called rich
                 context. Our model considers rich context by
                 discriminating label sequences that specify spans from
                 the CP to each SP candidate. It enables our model to
                 learn the effect of relative word order among SP
                 candidates as well as to learn the effect of distances
                 from the training data. In contrast to the learning
                 strategy of existing methods, our learning strategy is
                 that the model learns preference relations among SP
                 candidates in each sentence of the training data. This
                 leaning strategy enables consideration of all of the
                 rich context simultaneously. In our experiments, our
                 model had higher BLUE and RIBES scores for
                 Japanese-English, Chinese--English, and German-English
                 translation compared to the lexical reordering
                 models.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Kim:2014:CLA,
  author =       "Seokhwan Kim and Minwoo Jeong and Jonghoon Lee and
                 Gary Geunbae Lee",
  title =        "Cross-Lingual Annotation Projection for
                 Weakly-Supervised Relation Extraction",
  journal =      j-TALIP,
  volume =       "13",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2529994",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Feb 27 12:18:55 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Although researchers have conducted extensive studies
                 on relation extraction in the last decade, statistical
                 systems based on supervised learning are still limited,
                 because they require large amounts of training data to
                 achieve high performance level. In this article, we
                 propose cross-lingual annotation projection methods
                 that leverage parallel corpora to build a relation
                 extraction system for a resource-poor language without
                 significant annotation efforts. To make our method more
                 reliable, we introduce two types of projection
                 approaches with noise reduction strategies. We
                 demonstrate the merit of our method using a Korean
                 relation extraction system trained on projected
                 examples from an English-Korean parallel corpus.
                 Experiments show the feasibility of our approaches
                 through comparison to other systems based on
                 monolingual resources.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Yahya:2014:ATC,
  author =       "Adnan Yahya and Ali Salhi",
  title =        "{Arabic} Text Categorization Based on {Arabic
                 Wikipedia}",
  journal =      j-TALIP,
  volume =       "13",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2537129",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Feb 27 12:18:55 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article describes an algorithm for categorizing
                 Arabic text, relying on highly categorized corpus-based
                 datasets obtained from the Arabic Wikipedia by using
                 manual and automated processes to build and customize
                 categories. The categorization algorithm was built by
                 adopting a simple categorization idea then moving
                 forward to more complex ones. We applied tests and
                 filtration criteria to reach the best and most
                 efficient results that our algorithm can achieve. The
                 categorization depends on the statistical relations
                 between the input (test) text and the reference
                 (training) data supported by well-defined
                 Wikipedia-based categories. Our algorithm supports two
                 levels for categorizing Arabic text; categories are
                 grouped into a hierarchy of main categories and
                 subcategories. This introduces a challenge due to the
                 correlation between certain subcategories and overlap
                 between main categories. We argue that our algorithm
                 achieved good performance compared to other methods
                 reported in the literature.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Church:2014:TPG,
  author =       "Kenneth Church",
  title =        "{TALIP} Perspectives, Guest Editorial Commentary: What
                 Counts (and What Ought to Count)?",
  journal =      j-TALIP,
  volume =       "13",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2559789",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Thu Feb 27 12:18:55 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sulaiman:2014:EJS,
  author =       "Suliana Sulaiman and Khairuddin Omar and Nazlia Omar
                 and Mohd Zamri Murah and Hamdan Abdul Rahman",
  title =        "The Effectiveness of a {Jawi} Stemmer for Retrieving
                 Relevant {Malay} Documents in {Jawi} Characters",
  journal =      j-TALIP,
  volume =       "13",
  number =       "2",
  pages =        "6:1--6:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2540988",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Jun 20 18:22:19 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The Malay language has two types of writing script,
                 known as Rumi and Jawi. Most previous stemmer results
                 have reported on Malay Rumi characters and only a few
                 have tested Jawi characters. In this article, a new
                 Jawi stemmer has been proposed and tested for document
                 retrieval. A total of 36 queries and datasets from the
                 transliterated Jawi Quran were used. The experiment
                 shows that the mean average precision for a ``stemmed
                 Jawi'' document is 8.43\%. At the same time, the mean
                 average precision for a ``nonstemmed Jawi'' document is
                 5.14\%. The result from a paired sample t-test showed
                 that the use of a ``stemmed Jawi'' document increased
                 the precision in document retrieval. Further
                 experiments were performed to examine the precision of
                 the relevant documents that were retrieved at various
                 cutoff points for all 36 queries. The results for the
                 ``stemmed Jawi'' document showed a significantly
                 different start, at a cutoff of 40, compared with the
                 ``nonstemmed Jawi'' documents. This result shows the
                 usefulness of a Jawi stemmer for retrieving relevant
                 documents in the Jawi script.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Esmaili:2014:TKI,
  author =       "Kyumars Sheykh Esmaili and Shahin Salavati and
                 Anwitaman Datta",
  title =        "Towards {Kurdish} Information Retrieval",
  journal =      j-TALIP,
  volume =       "13",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2556948",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Jun 20 18:22:19 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The Kurdish language is an Indo-European language
                 spoken in Kurdistan, a large geographical region in the
                 Middle East. Despite having a large number of speakers,
                 Kurdish is among the less-resourced languages and has
                 not seen much attention from the IR and NLP research
                 communities. This article reports on the outcomes of a
                 project aimed at providing essential resources for
                 processing Kurdish texts. A principal output of this
                 project is Pewan, the first standard Test Collection to
                 evaluate Kurdish Information Retrieval systems. The
                 other language resources that we have built include a
                 lightweight stemmer and a list of stopwords. Our second
                 principal contribution is using these newly-built
                 resources to conduct a thorough experimental study on
                 Kurdish documents. Our experimental results show that
                 normalization, and to a lesser extent, stemming, can
                 greatly improve the performance of Kurdish IR
                 systems.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sharma:2014:WPS,
  author =       "Manoj Kumar Sharma and Debasis Samanta",
  title =        "Word Prediction System for Text Entry in {Hindi}",
  journal =      j-TALIP,
  volume =       "13",
  number =       "2",
  pages =        "8:1--8:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2617590",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Jun 20 18:22:19 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/spell.bib;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Word prediction is treated as an efficient technique
                 to enhance text entry rate. Existing word prediction
                 systems predict a word when a user correctly enters the
                 initial few characters of the word. In fact, a word
                 prediction system fails if the user makes errors in the
                 initial input. Therefore, there is a need to develop a
                 word prediction system that predicts desired words
                 while coping with errors in initial entries. This
                 requirement is more relevant in the case of text entry
                 in Indian languages, which are involved with a large
                 set of alphabets, words with complex characters and
                 inflections, phonetically similar sets of characters,
                 etc. In fact, text composition in Indian languages
                 involves frequent spelling errors, which presents a
                 challenge to develop an efficient word prediction
                 system. In this article, we address this problem and
                 propose a novel word prediction system. Our proposed
                 approach has been tried with Hindi, the national
                 language of India. Experiments with users substantiate
                 43.77\% keystroke savings, 92.49\% hit rate, and
                 95.82\% of prediction utilization with the proposed
                 word prediction system. Our system also reduces the
                 spelling error by 89.75\%.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Keskes:2014:SAT,
  author =       "Iskandar Keskes and Farah Benamara Zitoune and Lamia
                 Hadrich Belguith",
  title =        "Splitting {Arabic} Texts into Elementary Discourse
                 Units",
  journal =      j-TALIP,
  volume =       "13",
  number =       "2",
  pages =        "9:1--9:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2601401",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Jun 20 18:22:19 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "In this article, we propose the first work that
                 investigates the feasibility of Arabic discourse
                 segmentation into elementary discourse units within the
                 segmented discourse representation theory framework. We
                 first describe our annotation scheme that defines a set
                 of principles to guide the segmentation process. Two
                 corpora have been annotated according to this scheme:
                 elementary school textbooks and newspaper documents
                 extracted from the syntactically annotated Arabic
                 Treebank. Then, we propose a multiclass supervised
                 learning approach that predicts nested units. Our
                 approach uses a combination of punctuation,
                 morphological, lexical, and shallow syntactic features.
                 We investigate how each feature contributes to the
                 learning process. We show that an extensive
                 morphological analysis is crucial to achieve good
                 results in both corpora. In addition, we show that
                 adding chunks does not boost the performance of our
                 system.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Rubin:2014:TPG,
  author =       "Victoria L. Rubin",
  title =        "{TALIP} Perspectives, Guest Editorial Commentary:
                 Pragmatic and Cultural Considerations for Deception
                 Detection in {Asian} Languages",
  journal =      j-TALIP,
  volume =       "13",
  number =       "2",
  pages =        "10:1--10:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2605292",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Fri Jun 20 18:22:19 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "In hopes of sparking a discussion, I argue for much
                 needed research on automated deception detection in
                 Asian languages. The task of discerning truthful texts
                 from deceptive ones is challenging, but a logical
                 sequel to opinion mining. I suggest that applied
                 computational linguists pursue broader
                 interdisciplinary research on cultural differences and
                 pragmatic use of language in Asian cultures, before
                 turning to detection methods based on a primarily
                 Western (English-centric) worldview. Deception is
                 fundamentally human, but how do various cultures
                 interpret and judge deceptive behavior?",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Na:2014:LAN,
  author =       "Hwidong Na and Jong-Hyeok Lee",
  title =        "Linguistic analysis of non-{ITG} word reordering
                 between language pairs with different word order
                 typologies",
  journal =      j-TALIP,
  volume =       "13",
  number =       "3",
  pages =        "11:1--11:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2644810",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Oct 4 06:09:41 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "The Inversion Transduction Grammar (ITG) constraints
                 have been widely used for word reordering in machine
                 translation studies. They are, however, so restricted
                 that some types of word reordering cannot be handled
                 properly. We analyze three corpora between SVO and SOV
                 languages: Chinese--Korean, English-Japanese, and
                 English-Korean. In our analysis, sentences that require
                 non-ITG word reordering are manually categorized. We
                 also report the results for two quantitative measures
                 that reveal the significance of non-ITG word
                 reordering. In conclusion, we suggest that ITG
                 constraints are insufficient to deal with word
                 reordering in real situations.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{A:2014:AMO,
  author =       "Bharath A. and Sriganesh Madhvanath",
  title =        "Allograph modeling for online handwritten characters
                 in {Devanagari} using constrained stroke clustering",
  journal =      j-TALIP,
  volume =       "13",
  number =       "3",
  pages =        "12:1--12:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629622",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Oct 4 06:09:41 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Writer-specific character writing variations such as
                 those of stroke order and stroke number are an
                 important source of variability in the input when
                 handwriting is captured ``online'' via a stylus and a
                 challenge for robust online recognition of handwritten
                 characters and words. It has been shown by several
                 studies that explicit modeling of character allographs
                 is important for achieving high recognition accuracies
                 in a writer-independent recognition system. While
                 previous approaches have relied on unsupervised
                 clustering at the character or stroke level to find the
                 allographs of a character, in this article we propose
                 the use of constrained clustering using automatically
                 derived domain constraints to find a minimal set of
                 stroke clusters. The allographs identified have been
                 applied to Devanagari character recognition using
                 Hidden Markov Models and Nearest Neighbor classifiers,
                 and the results indicate substantial improvement in
                 recognition accuracy and/or reduction in memory and
                 computation time when compared to alternate modeling
                 techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Paik:2014:IBF,
  author =       "Jiaul H. Paik and Dipasree Pal and Swapan K. Parui",
  title =        "Incremental blind feedback: an effective approach to
                 automatic query expansion",
  journal =      j-TALIP,
  volume =       "13",
  number =       "3",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2611521",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Oct 4 06:09:41 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Automatic query expansion (AQE) is a useful technique
                 for enhancing the effectiveness of information
                 retrieval systems. In this article, we propose a novel
                 AQE algorithm which first adopts a systematic
                 incremental approach to choose feedback documents from
                 the top retrieved set and then selects the expansion
                 terms aggregating the scores from each feedback set. We
                 also devise a term selection measure and a number of
                 weighting schemes based on easily computable features.
                 A set of experiments with a large number of standard
                 test collections reveals that the proposed incremental
                 blind feedback algorithm outperforms a number of
                 state-of-the-art query expansion methods with
                 remarkable significance and consistency.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Saharia:2014:SRP,
  author =       "Navanath Saharia and Utpal Sharma and Jugal Kalita",
  title =        "Stemming resource-poor {Indian} languages",
  journal =      j-TALIP,
  volume =       "13",
  number =       "3",
  pages =        "14:1--14:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629670",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Oct 4 06:09:41 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Stemming is a basic method for morphological
                 normalization of natural language texts. In this study,
                 we focus on the problem of stemming several
                 resource-poor languages from Eastern India, viz.,
                 Assamese, Bengali, Bishnupriya Manipuri and Bodo. While
                 Assamese, Bengali and Bishnupriya Manipuri are
                 Indo-Aryan, Bodo is a Tibeto-Burman language. We design
                 a rule-based approach to remove suffixes from words. To
                 reduce over-stemming and under-stemming errors, we
                 introduce a dictionary of frequent words. We observe
                 that, for these languages a dominant amount of suffixes
                 are single letters creating problems during suffix
                 stripping. As a result, we introduce an HMM-based
                 hybrid approach to classify the mis-matched last
                 character. For each word, the stem is extracted by
                 calculating the most probable path in four HMM states.
                 At each step we measure the stemming accuracy for each
                 language. We obtain 94\% accuracy for Assamese and
                 Bengali and 87\%, and 82\% for Bishnupriya Manipuri and
                 Bodo, respectively, using the hybrid approach. We
                 compare our work with Morfessor [Creutz and Lagus
                 2005]. As of now, there is no reported work on stemming
                 for Bishnupriya Manipuri and Bodo. Our results on
                 Assamese and Bengali show significant improvement over
                 prior published work [Sarkar and Bandyopadhyay 2008;
                 Sharma et al. 2002, 2003].",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Sproat:2014:SJ,
  author =       "Richard Sproat",
  title =        "The state of the journal",
  journal =      j-TALIP,
  volume =       "13",
  number =       "3",
  pages =        "15:1--15:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2656620",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Sat Oct 4 06:09:41 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
}

@Article{Bang:2014:PVP,
  author =       "Jeesoo Bang and Jonghoon Lee and Gary Geunbae Lee and
                 Minhwa Chung",
  title =        "Pronunciation Variants Prediction Method to Detect
                 Mispronunciations by {Korean} Learners of {English}",
  journal =      j-TALIP,
  volume =       "13",
  number =       "4",
  pages =        "16:1--16:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629545",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Jan 7 15:23:49 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article presents an approach to nonnative
                 pronunciation variants modeling and prediction. The
                 pronunciation variants prediction method was developed
                 by generalized transformation-based error-driven
                 learning (GTBL). The modified goodness of pronunciation
                 (GOP) score was applied to effective mispronunciation
                 detection using logistic regression machine learning
                 under the pronunciation variants prediction.
                 English-read speech data uttered by Korean-speaking
                 learners of English were collected, then pronunciation
                 variation knowledge was extracted from the differences
                 between the canonical phonemes and the actual phonemes
                 of the speech data. With this knowledge, an
                 error-driven learning approach was designed that
                 automatically learns phoneme variation rules from
                 phoneme-level transcriptions. The learned rules
                 generate an extended recognition network to detect
                 mispronunciations. Three different mispronunciation
                 detection methods were tested including our logistic
                 regression machine learning method with modified GOP
                 scores and mispronunciation preference features; all
                 three methods yielded significant improvement in
                 predictions of pronunciation variants, and our logistic
                 regression method showed the best performance.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J820",
}

@Article{Liu:2014:DTL,
  author =       "Lemao Liu and Tiejun Zhao and Taro Watanabe and
                 Hailong Cao and Conghui Zhu",
  title =        "Discriminative Training for Log-Linear Based {SMT}:
                 Global or Local Methods",
  journal =      j-TALIP,
  volume =       "13",
  number =       "4",
  pages =        "17:1--17:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2637478",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Jan 7 15:23:49 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "In statistical machine translation, the standard
                 methods such as MERT tune a single weight with regard
                 to a given development data. However, these methods
                 suffer from two problems due to the diversity and
                 uneven distribution of source sentences. First, their
                 performance is highly dependent on the choice of a
                 development set, which may lead to an unstable
                 performance for testing. Second, the sentence level
                 translation quality is not assured since tuning is
                 performed on the document level rather than on sentence
                 level. In contrast with the standard global training in
                 which a single weight is learned, we propose novel
                 local training methods to address these two problems.
                 We perform training and testing in one step by locally
                 learning the sentence-wise weight for each input
                 sentence. Since the time of each tuning step is
                 unnegligible and learning sentence-wise weights for the
                 entire test set means many passes of tuning, it is a
                 great challenge for the efficiency of local training.
                 We propose an efficient two-phase method to put the
                 local training into practice by employing the
                 ultraconservative update. On NIST Chinese-to-English
                 translation tasks with both medium and large scales of
                 training data, our local training methods significantly
                 outperform standard methods with the maximal
                 improvements up to 2.0 BLEU points, meanwhile their
                 efficiency is comparable to that of the standard
                 methods.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J820",
}

@Article{Zhuang:2014:EPP,
  author =       "Yi Zhuang and Qing Li and Dickson K. W. Chiu and
                 Zhiang Wu and Haiyang Hu",
  title =        "Efficient Personalized Probabilistic Retrieval of
                 {Chinese} Calligraphic Manuscript Images in Mobile
                 Cloud Environment",
  journal =      j-TALIP,
  volume =       "13",
  number =       "4",
  pages =        "18:1--18:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629575",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Wed Jan 7 15:23:49 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Ancient language manuscripts constitute a key part of
                 the cultural heritage of mankind. As one of the most
                 important languages, Chinese historical calligraphy
                 work has contributed to not only the Chinese cultural
                 heritage but also the world civilization at large,
                 especially for Asia. To support deeper and more
                 convenient appreciation of Chinese calligraphy works,
                 based on our previous work on the probabilistic
                 retrieval of historical Chinese calligraphic character
                 manuscripts repositories, we propose a system framework
                 of the multi-feature-based Chinese calligraphic
                 character images probabilistic retrieval in the mobile
                 cloud network environment, which is called the DPRC. To
                 ensure retrieval efficiency, we further propose four
                 enabling techniques: (1) DRL-based probability
                 propagation, (2) optimal data placement scheme, (3)
                 adaptive data robust transmission algorithm, and (4)
                 index support filtering scheme. Comprehensive
                 experiments are conducted to testify the effectiveness
                 and efficiency of our proposed DPRC method.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J820",
}