%%% -*-BibTeX-*-
%%% ====================================================================
%%%  BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.18",
%%%     date            = "02 July 2009",
%%%     time            = "14:21:33 MDT",
%%%     filename        = "talip.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "22155 3377 15180 138118",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "Asian language information processing,
%%%                        bibliography, BibTeX, TALIP",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        ACM Transactions on Asian language
%%%                        information processing (TALIP) (CODEN none,
%%%                        ISSN 1530-0226), which began publishing in
%%%                        March 2002.
%%%
%%%                        The journal has a World Wide Web site at
%%%
%%%                            http://www.acm.org/pubs/talip/
%%%                            http://portal.acm.org/browse_dl.cfm?&idx=J820
%%%
%%%                        At version 1.18, the year coverage looked
%%%                        like this:
%%%
%%%                             2002 (  15)    2005 (  17)    2008 (  13)
%%%                             2003 (  22)    2006 (  28)    2009 (   9)
%%%                             2004 (  17)    2007 (  14)
%%%
%%%                             Article:        135
%%%
%%%                             Total entries:  135
%%%
%%%                        This bibliography has been constructed
%%%                        primarily from the publisher Web site.
%%%
%%%                        Numerous errors in the sources noted above
%%%                        have been corrected.  Spelling has been
%%%                        verified with the UNIX spell and GNU ispell
%%%                        programs using the exception dictionary
%%%                        stored in the companion file with extension
%%%                        .sok.
%%%
%%%                        BibTeX citation tags are uniformly chosen as
%%%                        name:year:abbrev, where name is the family
%%%                        name of the first author or editor, year is a
%%%                        4-digit number, and abbrev is a 3-letter
%%%                        condensation of important title words.
%%%                        Citation labels were automatically generated
%%%                        by software developed for the BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, with the help of
%%%                        ``bibsort -byvolume''.  The bibsort utility
%%%                        is available from ftp.math.utah.edu in
%%%                        /pub/tex/bib.
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility.",
%%%  }
%%% ====================================================================

@Preamble{
    "\hyphenation{
    }"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:

@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:

@String{j-TALIP                 = "ACM Transactions on Asian Language
                                  Information Processing"}

%%% ====================================================================
%%% Bibliography entries:

@Article{Wong:2002:P,
  author =       "Kam-Fai Wong and Jun'ichi Tsujii",
  title =        "Prologue",
  journal =      j-TALIP,
  volume =       "1",
  number =       "1",
  pages =        "1--2",
  month =        mar,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Tue Nov 5 23:44:34 MST 2002",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gao:2002:TUA,
  author =       "Jianfeng Gao and Joshua Goodman and Mingjing Li and
                 Kai-Fu Lee",
  title =        "Toward a unified approach to statistical language
                 modeling for {Chinese}",
  journal =      j-TALIP,
  volume =       "1",
  number =       "1",
  pages =        "3--33",
  month =        mar,
  year =         "2002",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/509900.509903",
  ISSN =         "1530-0226",
  bibdate =      "Tue Nov 5 23:44:34 MST 2002",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Lai:2002:MTE,
  author =       "Yu-Sheng Lai and Chung-Hsien Wu",
  title =        "Meaningful term extraction and discriminative term
                 selection in text categorization via unknown-word
                 methodology",
  journal =      j-TALIP,
  volume =       "1",
  number =       "1",
  pages =        "34--64",
  month =        mar,
  year =         "2002",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/509900.509904",
  ISSN =         "1530-0226",
  bibdate =      "Tue Nov 5 23:44:34 MST 2002",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kim:2002:MBG,
  author =       "Byeongchang Kim and Gary Geunbae Lee and Jong-Hyeok
                 Lee",
  title =        "Morpheme-based grapheme to phoneme conversion using
                 phonetic patterns and morphophonemic connectivity
                 information",
  journal =      j-TALIP,
  volume =       "1",
  number =       "1",
  pages =        "65--82",
  month =        mar,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Tue Nov 5 23:44:34 MST 2002",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Lee:2002:UTI,
  author =       "Tan Lee and Wai Lau and Y. W. Wong and P. C. Ching",
  title =        "Using tone information in {Cantonese} continuous
                 speech recognition",
  journal =      j-TALIP,
  volume =       "1",
  number =       "1",
  pages =        "83--102",
  month =        mar,
  year =         "2002",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/509900.509906",
  ISSN =         "1530-0226",
  bibdate =      "Tue Nov 5 23:44:34 MST 2002",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Chen:2002:BCE,
  author =       "Hsin-Hsi Chen and Chi-Ching Lin and Wen-Cheng Lin",
  title =        "Building a {Chinese-English} wordnet for translingual
                 applications",
  journal =      j-TALIP,
  volume =       "1",
  number =       "2",
  pages =        "103--122",
  month =        jun,
  year =         "2002",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/568954.568955",
  ISSN =         "1530-0226",
  bibdate =      "Tue Nov 5 23:44:36 MST 2002",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Meng:2002:GPM,
  author =       "Helen Meng and Po-Chui Luk and Kui Xu and Fuliang
                 Weng",
  title =        "{GLR} parsing with multiple grammars for natural
                 language queries",
  journal =      j-TALIP,
  volume =       "1",
  number =       "2",
  pages =        "123--144",
  month =        jun,
  year =         "2002",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/568954.568956",
  ISSN =         "1530-0226",
  bibdate =      "Tue Nov 5 23:44:36 MST 2002",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Murata:2002:CTM,
  author =       "Masaki Murata and Qing Ma and Hitoshi Isahara",
  title =        "Comparison of three machine-learning methods for
                 {Thai} part-of-speech tagging",
  journal =      j-TALIP,
  volume =       "1",
  number =       "2",
  pages =        "145--158",
  month =        jun,
  year =         "2002",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/568954.568957",
  ISSN =         "1530-0226",
  bibdate =      "Tue Nov 5 23:44:36 MST 2002",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Lu:2002:TWQ,
  author =       "Wen-Hsiang Lu and Lee-Feng Chien and Hsi-Jian Lee",
  title =        "Translation of web queries using anchor text mining",
  journal =      j-TALIP,
  volume =       "1",
  number =       "2",
  pages =        "159--172",
  month =        jun,
  year =         "2002",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/568954.568958",
  ISSN =         "1530-0226",
  bibdate =      "Tue Nov 5 23:44:36 MST 2002",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Li:2002:WBA,
  author =       "Wenjie Li and Kam-Fai Wong",
  title =        "A word-based approach for modeling and discovering
                 temporal relations embedded in {Chinese} sentences",
  journal =      j-TALIP,
  volume =       "1",
  number =       "3",
  pages =        "173--206",
  month =        sep,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Aug 7 08:49:00 MDT 2003",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Lee:2002:ACB,
  author =       "Jin-Seok Lee and Byeongchang Kim and Gary Geunbae
                 Lee",
  title =        "Automatic corpus-based tone and break-index prediction
                 using {K-ToBI} representation",
  journal =      j-TALIP,
  volume =       "1",
  number =       "3",
  pages =        "207--224",
  month =        sep,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Aug 7 08:49:00 MDT 2003",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Luk:2002:CCD,
  author =       "Robert W. P. Luk and K. L. Kwok",
  title =        "A comparison of {Chinese} document indexing strategies
                 and retrieval models",
  journal =      j-TALIP,
  volume =       "1",
  number =       "3",
  pages =        "225--268",
  month =        sep,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Aug 7 08:49:00 MDT 2003",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Suzuki:2002:LCS,
  author =       "Izumi Suzuki and Yoshiki Mikami and Ario Ohsato and
                 Yoshihide Chubachi",
  title =        "A language and character set determination method
                 based on {N}-gram statistics",
  journal =      j-TALIP,
  volume =       "1",
  number =       "3",
  pages =        "269--278",
  month =        sep,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Aug 7 08:49:00 MDT 2003",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Jin:2002:CDC,
  author =       "Honglan Jin and Kam-Fai Wong",
  title =        "A {Chinese} dictionary construction algorithm for
                 information retrieval",
  journal =      j-TALIP,
  volume =       "1",
  number =       "4",
  pages =        "281--296",
  month =        dec,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Aug 7 08:49:01 MDT 2003",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Li:2002:CCB,
  author =       "Yuanxiang Li and Xiaoqing Ding and Chew Lim Tan",
  title =        "Combining character-based bigrams with word-based
                 bigrams in contextual postprocessing for {Chinese}
                 script recognition",
  journal =      j-TALIP,
  volume =       "1",
  number =       "4",
  pages =        "297--309",
  month =        dec,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Aug 7 08:49:01 MDT 2003",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Lo:2003:CLS,
  author =       "Wai-Kit Lo and Helen Meng and P. C. Ching",
  title =        "Cross-language spoken document retrieval using
                 {HMM}-based retrieval model with multi-scale fusion",
  journal =      j-TALIP,
  volume =       "2",
  number =       "1",
  pages =        "1--26",
  month =        mar,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sun Jan 11 10:17:38 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Shi:2003:OHC,
  author =       "Daming Shi and Robert I. Damper and Steve R. Gunn",
  title =        "Offline handwritten {Chinese} character recognition by
                 radical decomposition",
  journal =      j-TALIP,
  volume =       "2",
  number =       "1",
  pages =        "27--48",
  month =        mar,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sun Jan 11 10:17:38 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Lee:2003:TAS,
  author =       "Yue-Shi Lee",
  title =        "Task adaptation in stochastic language model for
                 {Chinese} homophone disambiguation",
  journal =      j-TALIP,
  volume =       "2",
  number =       "1",
  pages =        "49--62",
  month =        mar,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sun Jan 11 10:17:38 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Shieh:2003:EAT,
  author =       "Jiann-Cherng Shieh",
  title =        "An efficient accessing technique for {Taiwanese}
                 phonetic transcriptions",
  journal =      j-TALIP,
  volume =       "2",
  number =       "1",
  pages =        "63--77",
  month =        mar,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sun Jan 11 10:17:38 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Oard:2003:SLE,
  author =       "Douglas W. Oard",
  title =        "The surprise language exercises",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "79--84",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Allan:2003:MTD,
  author =       "James Allan and Victor Lavrenko and Margaret E.
                 Connell",
  title =        "A month to topic detection and tracking in {Hindi}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "85--100",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Strassel:2003:LRC,
  author =       "Stephanie Strassel and Mike Maxwell and Christopher
                 Cieri",
  title =        "Linguistic resource creation for research and
                 technology development: {A} recent experiment",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "101--117",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Dorr:2003:RPD,
  author =       "Bonnie J. Dorr and Necip Fazil Ayan and Nizar Habash
                 and Nitin Madnani and Rebecca Hwa",
  title =        "Rapid porting of {DUSTer} to {Hindi}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "118--123",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Huang:2003:ENE,
  author =       "Fei Huang and Stephan Vogel and Alex Waibel",
  title =        "Extracting named entity translingual equivalence with
                 limited resources",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "124--129",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Larkey:2003:HCT,
  author =       "Leah S. Larkey and Margaret E. Connell and Nasreen
                 Abduljaleel",
  title =        "{Hindi CLIR} in thirty days",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "130--142",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Lavie:2003:EHE,
  author =       "Alon Lavie and Stephan Vogel and Lori Levin and Erik
                 Peterson and Katharina Probst and Ariadna Font
                 Llitj{\'o}s and Rachel Reynolds and Jaime Carbonell and
                 Richard Cohen",
  title =        "Experiments with a {Hindi-to-English} transfer-based
                 {MT} system under a miserly data scenario",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "143--163",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Xu:2003:CLR,
  author =       "Jinxi Xu and Ralph Weischedel",
  title =        "Cross-lingual retrieval for {Hindi}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "2",
  pages =        "164--168",
  month =        jun,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:35 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{May:2003:SWC,
  author =       "Jonathan May and Ada Brunstein and Prem Natarajan and
                 Ralph Weischedel",
  title =        "Surprise! {What}'s in a {Cebuano} or {Hindi Name?}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "169--180",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Sekine:2003:HEC,
  author =       "Satoshi Sekine and Ralph Grishman",
  title =        "{Hindi-English} cross-lingual question-answering
                 system",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "181--192",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Ma:2003:AHO,
  author =       "Huanfeng Ma and David Doermann",
  title =        "Adaptive {Hindi OCR} using generalized {Hausdorff}
                 image comparison",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "193--218",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{He:2003:MMI,
  author =       "Daqing He and Douglas W. Oard and Jianqiang Wang and
                 Jun Luo and Dina Demner-Fushman and Kareem Darwish and
                 Philip Resnik and Sanjeev Khudanpur and Michael Nossal
                 and Michael Subotin and Anton Leuski",
  title =        "Making {MIRACLEs}: {Interactive} translingual search
                 for {Cebuano} and {Hindi}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "219--244",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Leuski:2003:CLC,
  author =       "Anton Leuski and Chin-Yew Lin and Liang Zhou and
                 Ulrich Germann and Franz Josef Och and Eduard Hovy",
  title =        "Cross-lingual {C*ST*RD}: {English} access to {Hindi}
                 information",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "245--269",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Dorr:2003:CLH,
  author =       "Bonnie Dorr and David Zajic and Richard Schwartz",
  title =        "Cross-language headline generation for {Hindi}",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "270--289",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Li:2003:RDH,
  author =       "Wei Li and Andrew McCallum",
  title =        "Rapid development of {Hindi} named entity recognition
                 using conditional random fields and feature induction",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "290--294",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Maynard:2003:RCI,
  author =       "Diana Maynard and Valentin Tablan and Kalina Bontcheva
                 and Hamish Cunningham",
  title =        "Rapid customization of an information extraction
                 system for a surprise language",
  journal =      j-TALIP,
  volume =       "2",
  number =       "3",
  pages =        "295--300",
  month =        sep,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kang:2003:IPP,
  author =       "Mi-Young Kang and Aesun Yoon and Hyuk-Chul Kwon",
  title =        "Improving partial parsing based on error-pattern
                 analysis for a {Korean} grammar-checker",
  journal =      j-TALIP,
  volume =       "2",
  number =       "4",
  pages =        "301--323",
  month =        dec,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kim:2003:RRE,
  author =       "Harksoo Kim and Jungyun Seo",
  title =        "Resolution of referring expressions in a {Korean}
                 multimodal dialogue system",
  journal =      j-TALIP,
  volume =       "2",
  number =       "4",
  pages =        "324--337",
  month =        dec,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Mani:2004:ISI,
  author =       "Inderjeet Mani and James Pustejovsky and Beth
                 Sundheim",
  title =        "Introduction to the special issue on temporal
                 information processing",
  journal =      j-TALIP,
  volume =       "3",
  number =       "1",
  pages =        "1--10",
  month =        mar,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Han:2004:FRT,
  author =       "Benjamin Han and Alon Lavie",
  title =        "A framework for resolution of time in natural
                 language",
  journal =      j-TALIP,
  volume =       "3",
  number =       "1",
  pages =        "11--32",
  month =        mar,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Schilder:2004:EMT,
  author =       "Frank Schilder",
  title =        "Extracting meaning from temporal nouns and temporal
                 prepositions",
  journal =      j-TALIP,
  volume =       "3",
  number =       "1",
  pages =        "33--50",
  month =        mar,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Jang:2004:ATT,
  author =       "Seok Bae Jang and Jennifer Baldwin and Inderjeet
                 Mani",
  title =        "Automatic {TIMEX2} tagging of {Korean} news",
  journal =      j-TALIP,
  volume =       "3",
  number =       "1",
  pages =        "51--65",
  month =        mar,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Hobbs:2004:OTS,
  author =       "Jerry R. Hobbs and Feng Pan",
  title =        "An ontology of time for the {Semantic Web}",
  journal =      j-TALIP,
  volume =       "3",
  number =       "1",
  pages =        "66--85",
  month =        mar,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Nov 4 08:37:36 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gao:2004:ISI,
  author =       "Jianfeng Gao and Chin-Yew Lin",
  title =        "Introduction to the special issue on statistical
                 language modeling",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "87--93",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kim:2004:LTL,
  author =       "Woosung Kim and Sanjeev Khudanpur",
  title =        "Lexical triggers and latent semantic analysis for
                 cross-lingual language model adaptation",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "94--112",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Linares:2004:HLM,
  author =       "Diego Linares and Jos{\'e}-Miguel Bened{\'\i} and
                 Joan-Andreu S{\'a}nchez",
  title =        "A hybrid language model based on a combination of
                 {$N$}-grams and stochastic context-free grammars",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "113--127",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Chen:2004:DHG,
  author =       "Berlin Chen and Hsin-Min Wang and Lin-Shan Lee",
  title =        "A discriminative {HMM\slash N}-gram-based retrieval
                 approach for {Mandarin} spoken documents",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "128--145",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Nguyen:2004:EBS,
  author =       "Minh Le Nguyen and Susumu Horiguchi and Akira Shimazu
                 and Bao Tu Ho",
  title =        "Example-based sentence reduction using the hidden
                 {Markov} model",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "146--158",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Fung:2004:MEC,
  author =       "Pascale Fung and Grace Ngai and Yongsheng Yang and
                 Benfeng Chen",
  title =        "A maximum-entropy {Chinese} parser augmented by
                 transformation-based learning",
  journal =      j-TALIP,
  volume =       "3",
  number =       "2",
  pages =        "159--168",
  month =        jun,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Mon Nov 22 06:20:04 MST 2004",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Li:2004:AMF,
  author =       "Yujia Li and Tan Lee and Yao Qian",
  title =        "Analysis and modeling of {F0} contours for {Cantonese}
                 text-to-speech",
  journal =      j-TALIP,
  volume =       "3",
  number =       "3",
  pages =        "169--180",
  month =        sep,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Huang:2004:UWB,
  author =       "Chien-Chung Huang and Shui-Lung Chuang and Lee-Feng
                 Chien",
  title =        "Using a {Web}-based categorization approach to
                 generate thematic metadata from texts",
  journal =      j-TALIP,
  volume =       "3",
  number =       "3",
  pages =        "190--212",
  month =        sep,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Myaeng:2004:ISI,
  author =       "Sung Hyon Myaeng",
  title =        "Introduction to the special issue on computer
                 processing of oriental languages",
  journal =      j-TALIP,
  volume =       "3",
  number =       "4",
  pages =        "213--213",
  month =        dec,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Baoli:2004:AKN,
  author =       "Li Baoli and Lu Qin and Yu Shiwen",
  title =        "An adaptive $k$-nearest neighbor text categorization
                 strategy",
  journal =      j-TALIP,
  volume =       "3",
  number =       "4",
  pages =        "215--226",
  month =        dec,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kim:2004:UTI,
  author =       "Pyung Kim and Sung Hyon Myaeng",
  title =        "Usefulness of temporal information automatically
                 extracted from news articles for topic tracking",
  journal =      j-TALIP,
  volume =       "3",
  number =       "4",
  pages =        "227--242",
  month =        dec,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Zhang:2004:ESS,
  author =       "Le Zhang and Jingbo Zhu and Tianshun Yao",
  title =        "An evaluation of statistical spam filtering
                 techniques",
  journal =      j-TALIP,
  volume =       "3",
  number =       "4",
  pages =        "243--269",
  month =        dec,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Apr 14 12:20:22 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Wu:2005:DSF,
  author =       "Chung-Hsien Wu and Jui-Feng Yeh and Ming-Jun Chen",
  title =        "Domain-specific {FAQ} retrieval using independent
                 aspects",
  journal =      j-TALIP,
  volume =       "4",
  number =       "1",
  pages =        "1--17",
  month =        mar,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Jul 7 13:48:21 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Murata:2005:CEV,
  author =       "Masaki Murata and Masao Utiyama and Kiyotaka Uchimoto
                 and Hitoshi Isahara and Qing Ma",
  title =        "Correction of errors in a verb modality corpus for
                 machine translation with a machine-learning method",
  journal =      j-TALIP,
  volume =       "4",
  number =       "1",
  pages =        "18--37",
  month =        mar,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Jul 7 13:48:21 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Hendessi:2005:SSP,
  author =       "F. Hendessi and A. Ghayoori and T. A. Gulliver",
  title =        "A speech synthesizer for {Persian} text using a neural
                 network with a smooth ergodic {HMM}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "1",
  pages =        "38--52",
  month =        mar,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Jul 7 13:48:21 MDT 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}


@Article{Zhang:2005:COT,
  author =       "Ying Zhang and Phil Vines and Justin Zobel",
  title =        "{Chinese} {OOV} translation and post-translation query
                 expansion in {Chinese--English} cross-lingual
                 information retrieval",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "57--77",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Qu:2005:TES,
  author =       "Yan Qu and David A. Hull and Gregory Grefenstette and
                 David A. Evans and Motoko Ishikawa and Setsuko Nara and
                 Toshiya Ueda and Daisuke Noda and Kousaku Arita and
                 Yuki Funakoshi and Hiroshi Matsuda",
  title =        "Towards effective strategies for monolingual and
                 bilingual information retrieval: {Lessons} learned from
                 {NTCIR-4}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "78--110",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Sakai:2005:FPR,
  author =       "Tetsuya Sakai and Toshihiko Manabe and Makoto Koyama",
  title =        "Flexible pseudo-relevance feedback via selective
                 sampling",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "111--135",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kwok:2005:RRP,
  author =       "Kui Lam Kwok and Sora Choi and Norbert Dinstl",
  title =        "Rich results from poor resources: {NTCIR-4}
                 monolingual and cross-lingual retrieval of {Korean}
                 texts using {Chinese} and {English}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "135--158",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Savoy:2005:CSM,
  author =       "Jacques Savoy",
  title =        "Comparative study of monolingual and multilingual
                 search models for use with {Asian} languages",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "159--185",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Mase:2005:PTS,
  author =       "Hisao Mase and Tadataka Matsubayashi and Yuichi Ogawa
                 and Makoto Iwayama and Tadaaki Oshio",
  title =        "Proposal of two-stage patent retrieval method
                 considering the claim structure",
  journal =      j-TALIP,
  volume =       "4",
  number =       "2",
  pages =        "186--202",
  month =        jun,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Dec 17 08:07:33 MST 2005",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Nakagawa:2005:PSI,
  author =       "Hiroshi Nakagawa and Tatsunori Mori and Noriko Kando",
  title =        "Preface to the special issues on {NTCIR-4}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "237--242",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kato:2005:ODQ,
  author =       "Tsuneaki Kato and Jun'ichi Fukumoto and Fumito Masui
                 and Noriko Kando",
  title =        "Are open-domain question answering technologies useful
                 for information access dialogues?---an empirical study
                 and a proposal of a novel challenge",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "243--262",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Isozaki:2005:AHP,
  author =       "Hideki Isozaki",
  title =        "An analysis of a high-performance {Japanese} question
                 answering system",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "263--279",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Mori:2005:JQA,
  author =       "Tatsunori Mori",
  title =        "{Japanese} question-answering system using {A*} search
                 and its improvement",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "280--304",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Mori:2005:MAF,
  author =       "Tatsunori Mori and Masanori Nozawa and Yoshiaki
                 Asada",
  title =        "Multi-answer-focused multi-document summarization
                 using a question-answering engine",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "305--320",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Okazaki:2005:ICO,
  author =       "Naoaki Okazaki and Yutaka Matsuo and Mitsuru
                 Ishizuka",
  title =        "Improving chronological ordering of sentences
                 extracted from multiple newspaper articles",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "321--339",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Yoshioka:2005:CPB,
  author =       "Masaharu Yoshioka and Makoto Haraguchi",
  title =        "On a combination of probabilistic and {Boolean} {IR}
                 models for {WWW} document retrieval",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "340--356",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Lingpeng:2005:CIR,
  author =       "Yang Lingpeng and Ji Donghong and Tang Li and Niu
                 Zhengyu",
  title =        "{Chinese} information retrieval based on terms and
                 relevant terms",
  journal =      j-TALIP,
  volume =       "4",
  number =       "3",
  pages =        "357--374",
  month =        sep,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Jan 26 08:28:41 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Sakai:2006:ISI,
  author =       "Tetsuya Sakai and Yuji Matsumoto",
  title =        "Introduction to the special issue: {Recent} advances
                 in information processing and access for {Japanese}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "375--376",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Doi:2006:EBM,
  author =       "Takao Doi and Hirofumi Yamamoto and Eiichiro Sumita",
  title =        "Example-based machine translation using efficient
                 sentence retrieval based on edit-distance",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "377--399",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Tomiura:2006:ESS,
  author =       "Yoichi Tomiura and Shosaku Tanaka and Toru Hitaka",
  title =        "Estimating satisfactoriness of selectional restriction
                 from corpus without a thesaurus",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "400--416",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Iida:2006:ARA,
  author =       "Ryu Iida and Kentaro Inui and Yuji Matsumoto",
  title =        "Anaphora resolution by antecedent identification
                 followed by anaphoricity determination",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "417--434",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Inui:2006:ACK,
  author =       "Takashi Inui and Kentaro Inui and Yuji Matsumoto",
  title =        "Acquiring causal knowledge from text using the
                 connective marker {\em tame\/}",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "435--474",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Ma:2006:TSB,
  author =       "Qiang Ma and Katsumi Tanaka",
  title =        "Topic-structure-based complementary information
                 retrieval and its application",
  journal =      j-TALIP,
  volume =       "4",
  number =       "4",
  pages =        "475--503",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu Feb 16 10:54:02 MST 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Park:2006:ATM,
  author =       "Jong C. Park and Gary Geunbae Lee and Limsoon Wong",
  title =        "{AUTHOR}: {Text} mining and management in
                 biomedicine",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "1--3",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Park:2006:MBB,
  author =       "Kyung-Mi Park and Seon-Ho Kim and Hae-Chang Rim and
                 Young-Sook Hwang",
  title =        "{ME}-based biomedical named entity recognition using
                 lexical knowledge",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "4--21",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Nenadic:2006:MSR,
  author =       "Goran Nenadi{\'c} and Sophia Ananiadou",
  title =        "Mining semantically related terms from biomedical
                 literature",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "22--43",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kim:2006:ECI,
  author =       "Jung-Jae Kim and Jong C. Park",
  title =        "Extracting contrastive information from negation
                 patterns in biomedical literature",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "44--60",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Kim:2006:TPL,
  author =       "Eunju Kim and Yu Song and Cheongjae Lee and Kyoungduk
                 Kim and Gary Geunbae Lee and Byoung-Kee Yi and Jeongwon
                 Cha",
  title =        "Two-phase learning for biological event extraction and
                 verification",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "61--73",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Mima:2006:TBK,
  author =       "Hideki Mima and Sophia Ananiadou and Katsumori
                 Matsushima",
  title =        "Terminology-based knowledge mining for new knowledge
                 discovery",
  journal =      j-TALIP,
  volume =       "5",
  number =       "1",
  pages =        "74--88",
  month =        mar,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Thu May 11 11:29:25 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Carpuat:2006:AWS,
  author =       "Marine Carpuat and Pascale Fung and Grace Ngai",
  title =        "Aligning word senses using bilingual corpora",
  journal =      j-TALIP,
  volume =       "5",
  number =       "2",
  pages =        "89--120",
  month =        jun,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1165255.1165256",
  ISSN =         "1530-0226",
  bibdate =      "Thu Oct 5 07:00:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The growing importance of multilingual information
                 retrieval and machine translation has made multilingual
                 ontologies extremely valuable resources. Since the
                 construction of an ontology from scratch is a very
                 expensive and time-consuming undertaking, it is
                 attractive to consider ways of automatically aligning
                 monolingual ontologies, which already exist for many of
                 the world's major languages. Previous research
                 exploited similarity in the structure of the ontologies
                 to align, or manually created bilingual resources.
                 These approaches cannot be used to align ontologies
                 with vastly different structures and can only be
                 applied to much studied language pairs for which
                 expensive resources are already available. In this
                 paper, we propose a novel approach to align the
                 ontologies at the node level: Given a concept
                 represented by a particular word sense in one ontology,
                 our task is to find the best corresponding word sense
                 in the second language ontology. To this end, we
                 present a language-independent, corpus-based method
                 that borrows from techniques used in information
                 retrieval and machine translation. We show its
                 efficiency by applying it to two very different
                 ontologies in very different languages: the Mandarin
                 Chinese HowNet and the American English WordNet.
                 Moreover, we propose a methodology to measure bilingual
                 corpora comparability and show that our method is
                 robust enough to use noisy nonparallel bilingual
                 corpora efficiently, when clean parallel corpora are
                 not available.",
  acknowledgement = ack-nhfb,
}

@Article{Lee:2006:ABN,
  author =       "Chun-Jen Lee and Jason S. Chang and Jyh-Shing R.
                 Jang",
  title =        "Alignment of bilingual named entities in parallel
                 corpora using statistical models and multiple knowledge
                 sources",
  journal =      j-TALIP,
  volume =       "5",
  number =       "2",
  pages =        "121--145",
  month =        jun,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1165255.1165257",
  ISSN =         "1530-0226",
  bibdate =      "Thu Oct 5 07:00:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Named entity (NE) extraction is one of the fundamental
                 tasks in natural language processing (NLP). Although
                 many studies have focused on identifying NEs within
                 monolingual documents, aligning NEs in bilingual
                 documents has not been investigated extensively due to
                 the complexity of the task. In this article we
                 introduce a new approach to aligning bilingual NEs in
                 parallel corpora by incorporating statistical models
                 with multiple knowledge sources. In our approach, we
                 model the process of translating an English NE phrase
                 into a Chinese equivalent using lexical
                 translation\slash transliteration probabilities for
                 word translation and alignment probabilities for word
                 reordering. The method involves automatically learning
                 phrase alignment and acquiring word translations from a
                 bilingual phrase dictionary and parallel corpora, and
                 automatically discovering transliteration
                 transformations from a training set of
                 name-transliteration pairs. The method also involves
                 language-specific knowledge functions, including
                 handling abbreviations, recognizing Chinese personal
                 names, and expanding acronyms. At runtime, the proposed
                 models are applied to each source NE in a pair of
                 bilingual sentences to generate and evaluate the target
                 NE candidates; the source and target NEs are then
                 aligned based on the computed
                 probabilities. Experimental results demonstrate that
                 the proposed approach, which integrates statistical
                 models with extra knowledge sources, is highly feasible
                 and offers significant improvement in performance
                 compared to our previous work, as well as the
                 traditional approach of IBM Model 4.",
  acknowledgement = ack-nhfb,
}

@Article{Shirado:2006:UJH,
  author =       "Tamotsu Shirado and Satoko Marumoto and Masaki Murata
                 and Hitoshi Isahara",
  title =        "Using {Japanese} honorific expressions: {A}
                 psychological study",
  journal =      j-TALIP,
  volume =       "5",
  number =       "2",
  pages =        "146--164",
  month =        jun,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1165255.1165258",
  ISSN =         "1530-0226",
  bibdate =      "Thu Oct 5 07:00:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We investigated, via experiment, knowledge of
                 normative honorific expressions as used in textbooks
                 and in practice by people. Forty subjects divided into
                 four groups according to age (younger\slash older) and
                 gender (male\slash female) participated in the
                 experiments. The results show that knowledge about the
                 use of normative honorific expressions in textbooks is
                 similar to that demonstrated by the younger subject
                 groups, but differed from that of the older subject
                 groups. The knowledge of the older subjects was more
                 complex than that shown in textbooks or demonstrated by
                 the younger subjects. A model that can identify misuse
                 of honorific expressions in sentences is the framework
                 for this investigation. The model is minimal, but could
                 represent 76\% to 92\% of the subjects' knowledge
                 regarding each honorific element. This model will be
                 useful in the development of computer-aided systems to
                 help teach how honorific expressions should be used.",
  acknowledgement = ack-nhfb,
}

@Article{Wu:2006:ERT,
  author =       "Chung-Hsien Wu and Ze-Jing Chuang and Yu-Chung Lin",
  title =        "Emotion recognition from text using semantic labels
                 and separable mixture models",
  journal =      j-TALIP,
  volume =       "5",
  number =       "2",
  pages =        "165--183",
  month =        jun,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1165255.1165259",
  ISSN =         "1530-0226",
  bibdate =      "Thu Oct 5 07:00:29 MDT 2006",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This study presents a novel approach to automatic
                 emotion recognition from text. First, emotion
                 generation rules (EGRs) are manually deduced from
                 psychology to represent the conditions for generating
                 emotion. Based on the EGRs, the emotional state of each
                 sentence can be represented as a sequence of semantic
                 labels (SLs) and attributes (ATTs); SLs are defined as
                 the domain-independent features, while ATTs are
                 domain-dependent. The emotion association rules (EARs)
                 represented by SLs and ATTs for each emotion are
                 automatically derived from the sentences in an
                 emotional text corpus using the a priori algorithm.
                 Finally, a separable mixture model (SMM) is adopted to
                 estimate the similarity between an input sentence and
                 the EARs of each emotional state. Since some features
                 defined in this approach are domain-dependent, a dialog
                 system focusing on the students' daily expressions is
                 constructed, and only three emotional states, happy,
                 unhappy, and neutral, are considered for performance
                 evaluation. According to the results of the
                 experiments, given the domain corpus, the proposed
                 approach is promising, and easily ported into other
                 domains.",
  acknowledgement = ack-nhfb,
}

@Article{Dale:2006:ISS,
  author =       "Robert Dale",
  title =        "Introduction to the {Special} section: {Extended} best
                 papers from {IJCNLP 2005}",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "183--184",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Oh:2006:MTM,
  author =       "Jong-Hoon Oh and Key-Sun Choi and Hitoshi Isahara",
  title =        "A machine transliteration model based on
                 correspondence between graphemes and phonemes",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "185--208",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gao:2006:ESL,
  author =       "Jianfeng Gao and Hisami Suzuki and Wei Yuan",
  title =        "An empirical study on language model adaptation",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "209--227",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Ye:2006:SRL,
  author =       "Patrick Ye and Timothy Baldwin",
  title =        "Semantic role labeling of prepositional phrases",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "228--244",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Chung:2006:APD,
  author =       "Tze Leung Chung and Robert Wing Pong Luk and Kam Fai
                 Wong and Kui Lam Kwok and Dik Lun Lee",
  title =        "Adapting pivoted document-length normalization for
                 query size: {Experiments} in {Chinese} and {English}",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "245--263",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Matsumura:2006:ERB,
  author =       "Atsushi Matsumura and Atsuhiro Takasu and Jun Adachi",
  title =        "Effect of relationships between words on {Japanese}
                 information retrieval",
  journal =      j-TALIP,
  volume =       "5",
  number =       "3",
  pages =        "264--289",
  month =        sep,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:36 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Song:2006:ISI,
  author =       "Dawei Song and Jian-Yun Nie",
  title =        "Introduction to special issue on reasoning in natural
                 language information processing",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "291--295",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Nie:2006:ILM,
  author =       "Jian-Yun Nie and Guihong Cao and Jing Bai",
  title =        "Inferential language models for information
                 retrieval",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "296--322",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Gao:2006:SQT,
  author =       "Jianfeng Gao and Jian-Yun Nie and Ming Zhou",
  title =        "Statistical query translation models for
                 cross-language information retrieval",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "323--359",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Liu:2006:SFQ,
  author =       "Yi Liu and Rong Jin and Joyce Y. Chai",
  title =        "A statistical framework for query translation
                 disambiguation",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "360--387",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Li:2006:TTT,
  author =       "Baoli Li and Wenjie Li and Qin Lu",
  title =        "Topic tracking with time granularity reasoning",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "388--412",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Phan:2006:IDS,
  author =       "Xuan-Hieu Phan and Le-Minh Nguyen and Yasushi Inoguchi
                 and Tu-Bao Ho and Susumu Horiguchi",
  title =        "Improving discriminative sequential learning by
                 discovering important association of statistics",
  journal =      j-TALIP,
  volume =       "5",
  number =       "4",
  pages =        "413--438",
  month =        dec,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
}

@Article{Chen:2007:UDM,
  author =       "Yong Chen and Kwok-Ping Chan",
  title =        "Using data mining techniques and rough set theory for
                 language modeling",
  journal =      j-TALIP,
  volume =       "6",
  number =       "1",
  pages =        "??--??",
  month =        apr,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
  articleno =    "2",
}

@Article{Hsu:2007:MSB,
  author =       "Chung-Chian Hsu and Chien-Hsing Chen and Tien-Teng
                 Shih and Chun-Kai Chen",
  title =        "Measuring similarity between transliterations against
                 noise data",
  journal =      j-TALIP,
  volume =       "6",
  number =       "1",
  pages =        "??--??",
  month =        apr,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
  articleno =    "5",
}

@Article{Sakai:2007:RFQ,
  author =       "Tetsuya Sakai",
  title =        "On the reliability of factoid question answering
                 evaluation",
  journal =      j-TALIP,
  volume =       "6",
  number =       "1",
  pages =        "??--??",
  month =        apr,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
  articleno =    "3",
}

@Article{Wiseman:2007:CBC,
  author =       "Yair Wiseman and Irit Gefner",
  title =        "Conjugation-based compression for {Hebrew} texts",
  journal =      j-TALIP,
  volume =       "6",
  number =       "1",
  pages =        "??--??",
  month =        apr,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
  articleno =    "4",
}

@Article{Wu:2007:TBS,
  author =       "Chung-Hsien Wu and Hung-Yu Su and Yu-Hsien Chiu and
                 Chia-Hung Lin",
  title =        "Transfer-based statistical translation of {Taiwanese}
                 sign language using {PCFG}",
  journal =      j-TALIP,
  volume =       "6",
  number =       "1",
  pages =        "??--??",
  month =        apr,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1530-0226",
  bibdate =      "Sat Apr 14 10:21:37 MDT 2007",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
  articleno =    "1",
}

@Article{Kuo:2007:PSM,
  author =       "Jin-Shea Kuo and Haizhou Li and Ying-Kuei Yang",
  title =        "A phonetic similarity model for automatic extraction
                 of transliteration pairs",
  journal =      j-TALIP,
  volume =       "6",
  number =       "2",
  pages =        "6:1--6:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1282080.1282081",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:11:28 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This article proposes an approach for the automatic
                 extraction of transliteration pairs from Chinese Web
                 corpora. In this approach, we formulate the machine
                 transliteration process using a syllable-based phonetic
                 similarity model which consists of phonetic confusion
                 matrices and a Chinese character n -gram language
                 model. With the phonetic similarity model, the
                 extraction of transliteration pairs becomes a two-step
                 process of recognition followed by validation: First,
                 in the recognition process, we identify the most
                 probable transliteration in the k -neighborhood of a
                 recognized English word. Then, in the validation
                 process, we qualify the transliteration pair candidates
                 with a hypothesis test. We carry out an analytical
                 study on the statistics of several key factors in
                 English-Chinese transliteration to help formulate
                 phonetic similarity modeling. We then conduct both
                 supervised and unsupervised learning of a phonetic
                 similarity model on a development database. The
                 experimental results validate the effectiveness of the
                 phonetic similarity model by achieving an $F$-measure of
                 0.739 in supervised learning. The unsupervised learning
                 approach works almost as well as the supervised one,
                 thus allowing us to deploy automatic extraction of
                 transliteration pairs in the Web space.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  keywords =     "extraction of transliteration pairs; machine
                 translation; machine transliteration; phonetic
                 confusion probability; phonetic similarity modeling",
}

@Article{Xiao:2007:SNM,
  author =       "Jinghui Xiao and Xiaolong Wang and Bingquan Liu",
  title =        "The study of a nonstationary maximum entropy {Markov}
                 model and its application on the pos-tagging task",
  journal =      j-TALIP,
  volume =       "6",
  number =       "2",
  pages =        "7:1--7:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1282080.1282082",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:11:28 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Sequence labeling is a core task in natural language
                 processing. The maximum entropy Markov model (MEMM) is
                 a powerful tool in performing this task. This article
                 enhances the traditional MEMM by exploiting the
                 positional information of language elements. The
                 stationary hypothesis is relaxed in MEMM, and the
                 nonstationary MEMM (NS-MEMM) is proposed. Several
                 related issues are discussed in detail, including the
                 representation of positional information, NS-MEMM
                 implementation, smoothing techniques, and the space
                 complexity issue. Furthermore, the asymmetric NS-MEMM
                 presents a more flexible way to exploit positional
                 information. In the experiments, NS-MEMM is evaluated
                 on both the Chinese and the English pos-tagging tasks.
                 According to the experimental results, NS-MEMM yields
                 effective improvements over MEMM by exploiting
                 positional information. The smoothing techniques in
                 this article effectively solve the NS-MEMM
                 data-sparseness problem; the asymmetric NS-MEMM is also
                 an improvement by exploiting positional information in
                 a more flexible way.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  keywords =     "data sparseness problem; Markov property; MEMM;
                 pos-tagging; stationary hypothesis",
}

@Article{Zhuang:2007:IHD,
  author =       "Yl Zhuang and Yueting Zhuang and Qing Li and Lei
                 Chen",
  title =        "Interactive high-dimensional index for large {Chinese}
                 calligraphic character databases",
  journal =      j-TALIP,
  volume =       "6",
  number =       "2",
  pages =        "8:1--8:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1282080.1282083",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:11:28 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The large numbers of Chinese calligraphic scripts in
                 existence are valuable part of the Chinese cultural
                 heritage. However, due to the shape complexity of these
                 characters, it is hard to employ existing techniques to
                 effectively retrieve and efficiently index them. In
                 this article, using a novel shape-similarity- based
                 retrieval method in which shapes of calligraphic
                 characters are represented by their contour points
                 extracted from the character images, we propose an
                 interactive partial-distance-map (PDM)- based
                 high-dimensional indexing scheme which is designed
                 specifically to speed up the retrieval performance of
                 the large Chinese calligraphic character databases
                 effectively. Specifically, we use the approximate
                 minimal bounding sphere of a query character and
                 utilize users' relevance feedback to refine the query
                 gradually. Comprehensive experiments are conducted to
                 testify the efficiency and effectiveness of this
                 method. In addition, a new $k$-NN search called Pseudo
                 $k$-NN (P $k$-NN) search is presented to better
                 facilitate the PDM-based character retrieval.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  keywords =     "Chinese calligraphic character; hyper-centre
                 relocation; Pseudo k-NN",
}

@Article{Saraswathi:2007:CPE,
  author =       "S. Saraswathi and T. V. Geetha",
  title =        "Comparison of performance of enhanced morpheme-based
                 language model with different word-based language
                 models for improving the performance of {Tamil} speech
                 recognition system",
  journal =      j-TALIP,
  volume =       "6",
  number =       "3",
  pages =        "9:1--9:??",
  month =        nov,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1290002.1290003",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:11:45 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This paper describes a new technique of language
                 modeling for a highly inflectional Dravidian language,
                 Tamil. It aims to alleviate the main problems
                 encountered in processing of Tamil language, like
                 enormous vocabulary growth caused by the large number
                 of different forms derived from one word. The size of
                 the vocabulary was reduced by, decomposing the words
                 into stems and endings and storing these sub word units
                 (morphemes) in the vocabulary separately. A enhanced
                 morpheme-based language model was designed for the
                 inflectional language Tamil. The enhanced
                 morpheme-based language model was trained on the
                 decomposed corpus. The perplexity and Word Error Rate
                 (WER) were obtained to check the efficiency of the
                 model for Tamil speech recognition system. The results
                 were compared with word-based bigram and trigram
                 language models, distance based language model,
                 dependency based language model and class based
                 language model. From the results it was analyzed that
                 the enhanced morpheme-based trigram model with Katz
                 back-off smoothing effect improved the performance of
                 the Tamil speech recognition system when compared to
                 the word-based language models.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  keywords =     "language model; morphemes; perplexity; word error rate
                 and speech recognition",
}

@Article{Hussain:2007:DLS,
  author =       "Sarmad Hussain and Sana Gul and Afifah Waseem",
  title =        "Developing lexicographic sorting: {An} example for
                 {Urdu}",
  journal =      j-TALIP,
  volume =       "6",
  number =       "3",
  pages =        "10:1--10:??",
  month =        nov,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1290002.1290004",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:11:45 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Collation or lexicographic sorting is essential to
                 develop multilingual computing. This paper presents the
                 challenges faced in developing collation sequence for a
                 language. The paper discusses both theoretical
                 linguistic and practical standardization and encoding
                 related considerations that need to be addressed for
                 languages for which relevant standards and/or solutions
                 have not been defined. The paper also defines the
                 process, by giving the details of the procedure
                 followed for Urdu language, which is the national
                 language of Pakistan and is spoken by more than 100
                 million people across the world. The paper is oriented
                 towards organizations involved in developing and using
                 collation standards and the localization industry, and
                 not focused on theoretical issues.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  keywords =     "text processing; Urdu",
}

@Article{Fukumoto:2007:TTB,
  author =       "Fumiyo Fukumoto and Yoshimi Suzuki",
  title =        "Topic tracking based on bilingual comparable corpora
                 and semisupervised clustering",
  journal =      j-TALIP,
  volume =       "6",
  number =       "3",
  pages =        "11:1--11:??",
  month =        nov,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1290002.1290005",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:11:45 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In this paper, we address the problem of skewed data
                 in topic tracking: the small number of stories labeled
                 positive as compared to negative stories and propose a
                 method for estimating effective training stories for
                 the topic-tracking task. For a small number of labeled
                 positive stories, we use bilingual comparable, i.e.,
                 English, and Japanese corpora, together with the EDR
                 bilingual dictionary, and extract story pairs
                 consisting of positive and associated stories. To
                 overcome the problem of a large number of labeled
                 negative stories, we classified them into clusters.
                 This is done using a semisupervised clustering
                 algorithm, combining $k$ means with EM. The method was
                 tested on the TDT English corpus and the results showed
                 that the system works well when the topic under
                 tracking is talking about an event originating in the
                 source language country, even for a small number of
                 initial positive training stories.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  keywords =     "bilingual comparable corpora; clustering; EM
                 algorithm; N-gram model; topic detection and tracking",
}

@Article{Iida:2007:ZAR,
  author =       "Ryu Iida and Kentaro Inui and Yuji Matsumoto",
  title =        "Zero-anaphora resolution by learning rich syntactic
                 pattern features",
  journal =      j-TALIP,
  volume =       "6",
  number =       "4",
  pages =        "1:1--1:22",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1316457.1316458",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:11:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We approach the zero-anaphora resolution problem by
                 decomposing it into intrasentential and intersentential
                 zero-anaphora resolution tasks. For the former task,
                 syntactic patterns of zeropronouns and their
                 antecedents are useful clues. Taking Japanese as a
                 target language, we empirically demonstrate that
                 incorporating rich syntactic pattern features in a
                 state-of-the-art learning-based anaphora resolution
                 model dramatically improves the accuracy of
                 intrasentential zero-anaphora, which consequently
                 improves the overall performance of zero-anaphora
                 resolution.",
  acknowledgement = ack-nhfb,
}

@Article{Adriani:2007:SIC,
  author =       "Mirna Adriani and Jelita Asian and Bobby Nazief and S.
                 M. M. Tahaghoghi and Hugh E. Williams",
  title =        "Stemming {Indonesian}: {A} confix-stripping approach",
  journal =      j-TALIP,
  volume =       "6",
  number =       "4",
  pages =        "2:1--2:33",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1316457.1316458",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:11:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Stemming words to (usually) remove suffixes has
                 applications in text search, machine translation,
                 document summarization, and text classification. For
                 example, English stemming reduces the words 'computer,'
                 'computing,' 'computation,' and 'computability' to
                 their common morphological root, 'comput-.' In text
                 search, this permits a search for 'computers' to find
                 documents containing all words with the stem 'comput-.'
                 In the Indonesian language, stemming is of crucial
                 importance: words have prefixes, suffixes, infixes, and
                 confixes that make matching related words
                 difficult.\par

                 This work surveys existing techniques for stemming
                 Indonesian words to their morphological roots, presents
                 our novel and highly accurate CS algorithm, and
                 explores the effectiveness of stemming in the context
                 of general-purpose text information retrieval through
                 ad hoc queries.",
  acknowledgement = ack-nhfb,
  keywords =     "Indonesian; information retrieval; stemming",
}

@Article{Thao:2007:NER,
  author =       "Pham Thi Xuan Thao and Tran Quoc Tri and Dinh Dien and
                 Nigel Collier",
  title =        "Named entity recognition in {Vietnamese} using
                 classifier voting",
  journal =      j-TALIP,
  volume =       "6",
  number =       "4",
  pages =        "3:1--3:18",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1316457.1316460",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:11:55 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Named entity recognition (NER) is one of the
                 fundamental tasks in natural-language processing (NLP).
                 Though the combination of different classifiers has
                 been widely applied in several well-studied languages,
                 this is the first time this method has been applied to
                 Vietnamese. In this article, we describe how voting
                 techniques can improve the performance of Vietnamese
                 NER. By combining several state-of-the-art
                 machine-learning algorithms using voting strategies,
                 our final result outperforms individual algorithms and
                 gained an $F$-measure of 89.12. A detailed discussion
                 about the challenges of NER in Vietnamese is also
                 presented.",
  acknowledgement = ack-nhfb,
  keywords =     "C4.5; Conditional Random Fields; Na{\"\i}ve Bayes
                 named entity recognition; support vector machines;
                 transformation based learning; Vietnamese; voting",
}

@Article{Chen:2008:SBM,
  author =       "Yufeng Chen and Chengqing Zong",
  title =        "A Structure-Based Model for {Chinese} Organization
                 Name Translation",
  journal =      j-TALIP,
  volume =       "7",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1330291.1330292",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:12:10 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Named entity (NE) translation is a fundamental task in
                 multilingual natural language processing. The
                 performance of a machine translation system depends
                 heavily on precise translation of the inclusive NEs.
                 Furthermore, organization name (ON) is the most complex
                 NE for translation among all the NEs. In this article,
                 the structure formulation of ONs is investigated and a
                 hierarchical structure-based ON translation model for
                 Chinese-to-English translation system is
                 presented.\par

                 First, the model performs ON chunking; then both the
                 translation of words within chunks and the process of
                 chunk-reordering are achieved by synchronous
                 context-free grammar (CFG). The CFG rules are extracted
                 from bilingual ON pairs in a training program.\par

                 The main contributions of this article are: (1)
                 defining appropriate chunk-units for analyzing the
                 internal structure of Chinese ONs; (2) making the
                 chunk-based ON translation feasible and flexible via a
                 hierarchical CFG derivation; and (3) proposing a
                 training architecture to automatically learn the
                 synchronous CFG for constructing ONs with chunk-units
                 from aligned bilingual ON pairs. The experiments show
                 that the proposed approach translates the Chinese ONs
                 into English with an accuracy of 93.75\% and
                 significantly improves the performance of a baseline
                 statistical machine translation (SMT) system.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  keywords =     "alignment; chunk; hierarchical derivation; machine
                 translation; named entity; organization name; rules
                 extraction; structural analysis; synchronous
                 context-free grammar",
}

@Article{Jeong:2008:ISR,
  author =       "Minwoo Jeong and Gary Geunbae Lee",
  title =        "Improving Speech Recognition and Understanding
                 using Error-Corrective Reranking",
  journal =      j-TALIP,
  volume =       "7",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1330291.1330293",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:12:10 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The main issues of practical spoken-language
                 applications for human-computer interface are how to
                 overcome speech recognition errors and guarantee the
                 reasonable end-performance of spoken-language
                 applications. Therefore, handling the erroneously
                 recognized outputs is a key in developing robust
                 spoken-language systems. To address this problem, we
                 present a method to improve the accuracy of speech
                 recognition and performance of spoken-language
                 applications. The proposed error corrective reranking
                 approach exploits recognition environment
                 characteristics and domain-specific semantic
                 information to provide robustness and adaptability for
                 a spoken-language system. We demonstrate some
                 experiments of spoken dialogue tasks and empirical
                 results that show an improvement in accuracy for both
                 speech recognition and spoken-language understanding.
                 In our experiment, we show an error reduction of up to
                 9.7\% and 16.8\%; of word error rate, and 5.5\% and
                 7.9\% of understanding error for the air travel and
                 telebanking service domains.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  keywords =     "automatic speech recognition; error-corrective
                 reranking; improving spoken dialogue system;
                 spoken-language understanding",
}

@Article{Kuo:2008:MSG,
  author =       "June-Jei Kuo and Hsin-Hsi Chen",
  title =        "Multidocument Summary Generation: Using
                 Informative and Event Words",
  journal =      j-TALIP,
  volume =       "7",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1330291.1330294",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:12:10 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Summary generation for multiple documents poses a
                 number of issues including sentence selection, sentence
                 ordering, and sentence reduction over single-document
                 summarization. In addition, the temporal resolution
                 among extracted sentences is also important. This
                 article considers informative words and event words to
                 deal with multidocument summarization. These words
                 indicate the important concepts and relationships in a
                 document or among a set of documents, and can be used
                 to select salient sentences. We present a temporal
                 resolution algorithm, using focusing time and
                 coreference chains, to convert Chinese temporal
                 expressions in a document into calendrical forms.
                 Moreover, we consider the last calendrical form of a
                 sentence as a sentence time stamp to address sentence
                 ordering. Informative words, event words, and temporal
                 words are introduced to a sentence reduction algorithm,
                 which deals with both length constraints and
                 information coverage. Experiments on Chinese-news data
                 sets show significant improvements of both information
                 coverage and readability.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  keywords =     "latent semantic analysis; multidocument summary
                 generation; sentence ordering; sentence reduction;
                 sentence selection; temporal processing",
}

@Article{Kando:2008:INS,
  author =       "Noriko Kando and Teruko Mitamura and Tetsuya Sakai",
  title =        "Introduction to the {NTCIR-6 Special Issue}",
  journal =      j-TALIP,
  volume =       "7",
  number =       "2",
  pages =        "4:1--4:??",
  month =        jun,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1362782.1362783",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:12:23 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
  articleno =    "4",
}

@Article{Zhou:2008:HTE,
  author =       "Dong Zhou and Mark Truran and Tim Brailsford and Helen
                 Ashman",
  title =        "A Hybrid Technique for {English--Chinese} Cross
                 Language Information Retrieval",
  journal =      j-TALIP,
  volume =       "7",
  number =       "2",
  pages =        "5:1--5:??",
  month =        jun,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1362782.1362784",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:12:23 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "In this article we describe a hybrid technique for
                 dictionary-based query translation suitable for
                 English-Chinese cross language information retrieval.
                 This technique marries a graph-based model for the
                 resolution of candidate term ambiguity with a
                 pattern-based method for the translation of
                 out-of-vocabulary (OOV) terms. We evaluate the
                 performance of this hybrid technique in an experiment
                 using several NTCIR test collections. Experimental
                 results indicate a substantial increase in retrieval
                 effectiveness over various baseline systems
                 incorporating machine- and dictionary-based
                 translation.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  keywords =     "cross language information retrieval; disambiguation;
                 graph-based analysis; patterns; unknown term
                 translation",
}

@Article{Higashinaka:2008:AAC,
  author =       "Ryuichiro Higashinaka and Hideki Isozaki",
  title =        "Automatically Acquiring Causal Expression Patterns
                 from Relation-annotated Corpora to Improve Question
                 Answering for why-Questions",
  journal =      j-TALIP,
  volume =       "7",
  number =       "2",
  pages =        "6:1--6:??",
  month =        jun,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1362782.1362785",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:12:23 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This article describes our approach for answering
                 why-questions that we initially introduced at NTCIR-6
                 QAC-4. The approach automatically acquires causal
                 expression patterns from relation-annotated corpora by
                 abstracting text spans annotated with a causal relation
                 and by mining syntactic patterns that are useful for
                 distinguishing sentences annotated with a causal
                 relation from those annotated with other relations. We
                 use these automatically acquired causal expression
                 patterns to create features to represent answer
                 candidates, and use these features together with other
                 possible features related to causality to train an
                 answer candidate ranker that maximizes the QA
                 performance with regards to the corpus of why-questions
                 and answers. NAZEQA, a Japanese why-QA system based on
                 our approach, clearly outperforms baselines with a Mean
                 Reciprocal Rank (top-5) of 0.223 when sentences are
                 used as answers and with a MRR (top-5) of 0.326 when
                 paragraphs are used as answers, making it presumably
                 the best-performing fully implemented why-QA system.
                 Experimental results also verified the usefulness of
                 the automatically acquired causal expression
                 patterns.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  keywords =     "causal expression; pattern mining; question answering;
                 relation-annotated corpus",
}

@Article{Li:2008:ASV,
  author =       "Yaoyong Li and Kalina Bontcheva",
  title =        "Adapting Support Vector Machines for $F$-term-based
                 Classification of Patents",
  journal =      j-TALIP,
  volume =       "7",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jun,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1362782.1362786",
  ISSN =         "1530-0226",
  bibdate =      "Mon Jun 16 17:12:23 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Support Vector Machines (SVM) have obtained
                 state-of-the-art results on many applications including
                 document classification. However, previous works on
                 applying SVMs to the $F$-term patent classification task
                 did not obtain as good results as other learning
                 algorithms such as k-NN. This is due to the fact that
                 $F$-term patent classification is different from
                 conventional document classification in several
                 aspects, mainly because it is a multiclass, multilabel
                 classification problem with semi-structured documents
                 and multi-faceted hierarchical categories.\par

                 This article describes our SVM-based system and several
                 techniques we developed successfully to adapt SVM for
                 the specific features of the $F$-term patent
                 classification task. We evaluate the techniques using
                 the NTCIR-6 $F$-term classification terms assigned to
                 Japanese patents. Moreover, our system participated in
                 the NTCIR-6 patent classification evaluation and
                 obtained the best results according to two of the three
                 metrics used for task performance evaluation. Following
                 the NTCIR-6 participation, we developed two new
                 techniques, which achieved even better scores using all
                 three NTCIR-6 metrics, effectively outperforming all
                 participating systems. This article presents this new
                 work and the experimental results that demonstrate the
                 benefits of the latest approach.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  keywords =     "F-term classification; patent processing; support
                 vector machines",
}

@Article{Fukumoto:2008:ICL,
  author =       "Fumiyo Fukumoto and Yoshimi Suzuki",
  title =        "Integrating Cross-Language Hierarchies and Its
                 Application to Retrieving Relevant Documents",
  journal =      j-TALIP,
  volume =       "7",
  number =       "3",
  pages =        "8:1--8:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1386869.1386870",
  ISSN =         "1530-0226",
  bibdate =      "Fri Aug 22 13:11:51 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Internet directories such as Yahoo! are an approach to
                 improve the efficacy and efficiency of Information
                 Retrieval (IR) on the Web, as pages (documents) are
                 organized into hierarchical categories, and similar
                 pages are grouped together. Most of the search engines
                 on the Web service find documents that are assigned to
                 a single classification hierarchy. Categories in the
                 hierarchy are carefully defined by human experts and
                 documents are well organized. However, a single
                 hierarchy in one language is often insufficient to find
                 all relevant material, as each hierarchy tends to have
                 some bias in both defining hierarchical structure and
                 classifying documents. Moreover, documents written in a
                 language other than the user's native language often
                 include large amounts of information related to the
                 user's request. In this article, we propose a method of
                 integrating cross-language (CL) category hierarchies,
                 that is, Reuters '96 hierarchy and UDC code hierarchy
                 of Japanese by estimating category similarities. The
                 method does not simply merge two different hierarchies
                 into one large hierarchy but instead extracts sets of
                 similar categories, where each element of the sets is
                 relevant with each other. It consists of three steps.
                 First, we classify documents from one hierarchy into
                 categories with another hierarchy using a
                 cross-language text classification (CLTC) technique,
                 and extract category pairs of two hierarchies. Next, we
                 apply {\em {\chi}\/}$^2$ statistics to these pairs to
                 obtain similar category pairs, and finally we apply the
                 generating function of the Apriori algorithm
                 (Apriori-Gen) to the category pairs, and find sets of
                 similar categories. Moreover, we examined whether
                 integrating hierarchies helps to support retrieval of
                 documents with similar contents. The retrieval results
                 showed a 42.7\% improvement over the baseline
                 nonhierarchy model, and a 21.6\% improvement over a
                 single hierarchy.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  keywords =     "cross-language hierarchies; information integration;
                 retrieval of relevant documents; text classification",
}

@Article{Sharma:2008:AMI,
  author =       "Utpal Sharma and Jugal K. Kalita and Rajib K. Das",
  title =        "Acquisition of Morphology of an {Indic} Language
                 from Text Corpus",
  journal =      j-TALIP,
  volume =       "7",
  number =       "3",
  pages =        "9:1--9:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1386869.1386871",
  ISSN =         "1530-0226",
  bibdate =      "Fri Aug 22 13:11:51 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This article describes an approach to unsupervised
                 learning of morphology from an unannotated corpus for a
                 highly inflectional Indo-European language called
                 Assamese spoken by about 30 million people. Although
                 Assamese is one of India's national languages, it
                 utterly lacks computational linguistic resources. There
                 exists no prior computational work on this language
                 spoken widely in northeast India. The work presented is
                 pioneering in this respect. In this article, we discuss
                 salient issues in Assamese morphology where the
                 presence of a large number of suffixal determiners,
                 sandhi, samas, and the propensity to use suffix
                 sequences make approximately 50\% of the words used in
                 written and spoken text inflected. We implement methods
                 proposed by Gaussier and Goldsmith on acquisition of
                 morphological knowledge, and obtain F-measure
                 performance below 60\%. This motivates us to present a
                 method more suitable for handling suffix sequences,
                 enabling us to increase the F-measure performance of
                 morphology acquisition to almost 70\%. We describe how
                 we build a morphological dictionary for Assamese from
                 the text corpus. Using the morphological knowledge
                 acquired and the morphological dictionary, we are able
                 to process small chunks of data at a time as well as a
                 large corpus. We achieve approximately 85\% precision
                 and recall during the analysis of small chunks of
                 coherent text.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  keywords =     "Assamese; Indo-European languages; machine learning;
                 morphology",
}

@Article{Chen:2008:TTR,
  author =       "Jiang-Chun Chen and Jyh-Shing Roger Jang",
  title =        "{TRUES}: {Tone Recognition Using Extended Segments}",
  journal =      j-TALIP,
  volume =       "7",
  number =       "3",
  pages =        "10:1--10:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1386869.1386872",
  ISSN =         "1530-0226",
  bibdate =      "Fri Aug 22 13:11:51 MDT 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Tone recognition has been a basic but important task
                 for speech recognition and assessment of tonal
                 languages, such as Mandarin Chinese. Most previously
                 proposed approaches adopt a two-step approach where
                 syllables within an utterance are identified via forced
                 alignment first, and tone recognition using a variety
                 of classifiers---such as neural networks, Gaussian
                 mixture models (GMM), hidden Markov models (HMM),
                 support vector machines (SVM)---is then performed on
                 each segmented syllable to predict its tone. However,
                 forced alignment does not always generate accurate
                 syllable boundaries, leading to unstable
                 voiced-unvoiced detection and deteriorating performance
                 in tone recognition. Aiming to alleviate this problem,
                 we propose a robust approach called Tone Recognition
                 Using Extended Segments (TRUES) for HMM-based
                 continuous tone recognition. The proposed approach
                 extracts an unbroken pitch contour from a given
                 utterance based on dynamic programming over time-domain
                 acoustic features of average magnitude difference
                 function (AMDF). The pitch contour of each syllable is
                 then extended for tri-tone HMM modeling, such that the
                 influence from inaccurate syllable boundaries is
                 lessened. Our experimental results demonstrate that the
                 proposed TRUES achieves 49.13\% relative error rate
                 reduction over that of the recently proposed supratone
                 modeling, which is deemed the state of the art of tone
                 recognition that outperforms several previously
                 proposed approaches. The encouraging improvement
                 demonstrates the effectiveness and robustness of the
                 proposed TRUES, as well as the corresponding pitch
                 determination algorithm which produces unbroken pitch
                 contours.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  keywords =     "context-dependent tone modeling; continuous tone
                 recognition; extended segment for tone recognition;
                 HMM; Mandarin Chinese; supratone modeling",
}

@Article{Lin:2008:VCD,
  author =       "Jeng-Wei Lin and Jan-Ming Ho and Li-Ming Tseng and
                 Feipei Lai",
  title =        "Variant {Chinese} Domain Name Resolution",
  journal =      j-TALIP,
  volume =       "7",
  number =       "4",
  pages =        "11:1--11:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1450295.1450296",
  ISSN =         "1530-0226",
  bibdate =      "Mon Dec 8 13:56:10 MST 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Many efforts in past years have been made to lower the
                 linguistic barriers for non-native English speakers to
                 access the Internet. Internet standard RFC 3490,
                 referred to as IDNA (Internationalizing Domain Names in
                 Applications), focuses on access to IDNs
                 (Internationalized Domain Names) in a range of scripts
                 that is broader in scope than the original ASCII.
                 However, the use of character variants that have
                 similar appearances and/or interpretations could create
                 confusion. A variant IDL (Internationalized Domain
                 Label), derived from an IDL by replacing some
                 characters with their variants, should match the
                 original IDL; and thus a variant IDN does. In RFC 3743,
                 referred to as JET (Joint Engineering Team) Guidelines,
                 it is suggested that zone administrators model this
                 concept of equivalence as an atomic IDL package. When
                 an IDL is registered, an IDL package is created that
                 contains its variant IDLs generated according to the
                 zone-specific Language Variant Tables (LVTs). In
                 addition to the registered IDL, the name holder can
                 request the domain registry to activate some of the
                 variant IDLs, free or by an extra fee. The activated
                 variant IDLs are stored in the zone files, and thus
                 become resolvable. However, an issue of scalability
                 arises when there is a large number of variant IDLs to
                 be activated.\par

                 In this article, the authors present a resolution
                 protocol that resolves the variant IDLs into the
                 registered IDL, specifically for Han character
                 variants. Two Han characters are said to be variants of
                 each other if they have the same meaning and are
                 pronounced the same. Furthermore, Han character
                 variants usually have similar appearances. It is not
                 uncommon that a Chinese IDL has a large number of
                 variant IDLs. The proposed protocol introduces a new RR
                 (resource record) type, denoted as VarIdx RR, to
                 associate a variant expression of the variant IDLs with
                 the registered IDL. The label of the VarIdx RR, denoted
                 as the variant index, is assigned by an indexing
                 function that is designed to give the same value to all
                 of the variant IDLs enumerated by the variant
                 expression. When one of the variant IDLs is accessed,
                 Internet applications can compute the variant index,
                 look up the VarIdx RRs, and resolve the variant IDL
                 into the registered IDL.\par

                 The authors examine two sets of Chinese IDLs registered
                 in TWNIC and CNNIC, respectively. The results show that
                 for a registered Chinese IDL, a very small number of
                 VarIdx RRs, usually one or two, are sufficient to
                 activate all of its variant IDLs. The authors also
                 represent a Web redirection service that employs the
                 proposed resolution protocol to redirect a URL
                 addressed by a variant IDN to the URL addressed by the
                 registered IDN. The experiment results show that the
                 proposed protocol successfully resolves the variant
                 IDNs into the registered IDNs.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  keywords =     "conversion between traditional Chinese and simplified
                 Chinese; Han character folding; Han character variant;
                 IDN spoof; internationalized domain name;
                 localization",
}

@Article{Lee:2008:BCQ,
  author =       "Cheng-Wei Lee and Min-Yuh Day and Cheng-Lung Sung and
                 Yi-Hsun Lee and Tian-Jian Jiang and Chia-Wei Wu and
                 Cheng-Wei Shih and Yu-Ren Chen and Wen-Lian Hsu",
  title =        "Boosting {Chinese} Question Answering with Two
                 Lightweight Methods: {ABSPs} and {SCO-QAT}",
  journal =      j-TALIP,
  volume =       "7",
  number =       "4",
  pages =        "12:1--12:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1450295.1450297",
  ISSN =         "1530-0226",
  bibdate =      "Mon Dec 8 13:56:10 MST 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Question Answering (QA) research has been conducted in
                 many languages. Nearly all the top performing systems
                 use heavy methods that require sophisticated
                 techniques, such as parsers or logic provers. However,
                 such techniques are usually unavailable or unaffordable
                 for under-resourced languages or in resource-limited
                 situations. In this article, we describe how a
                 top-performing Chinese QA system can be designed by
                 using lightweight methods effectively. We propose two
                 lightweight methods, namely the Sum of Co-occurrences
                 of Question and Answer Terms (SCO-QAT) and
                 Alignment-based Surface Patterns (ABSPs). SCO-QAT is a
                 co-occurrence-based answer-ranking method that does not
                 need extra knowledge, word-ignoring heuristic rules, or
                 tools. It calculates co-occurrence scores based on the
                 passage retrieval results. ABSPs are syntactic patterns
                 trained from question-answer pairs with a multiple
                 alignment algorithm. They are used to capture the
                 relations between terms and then use the relations to
                 filter answers. We attribute the success of the ABSPs
                 and SCO-QAT methods to the effective use of local
                 syntactic information and global co-occurrence
                 information.\par

                 By using SCO-QAT and ABSPs, we improved the RU-Accuracy
                 of our testbed QA system, ASQA, from 0.445 to 0.535 on
                 the NTCIR-5 dataset. It also achieved the top 0.5
                 RU-Accuracy on the NTCIR-6 dataset. The result shows
                 that lightweight methods are not only cheaper to
                 implement, but also have the potential to achieve
                 state-of-the-art performances.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  keywords =     "answer filtering; answer ranking; Chinese question
                 answering; co-occurrence; lightweight method; surface
                 pattern",
}

@Article{Che:2008:UHC,
  author =       "Wanxiang Che and Min Zhang and AiTi Aw and ChewLim Tan
                 and Ting Liu and Sheng Li",
  title =        "Using a Hybrid Convolution Tree Kernel for Semantic
                 Role Labeling",
  journal =      j-TALIP,
  volume =       "7",
  number =       "4",
  pages =        "13:1--13:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1450295.1450298",
  ISSN =         "1530-0226",
  bibdate =      "Mon Dec 8 13:56:10 MST 2008",
  bibsource =    "http://portal.acm.org/",
  abstract =     "As a kind of Shallow Semantic Parsing, Semantic Role
                 Labeling (SRL) is gaining more attention as it benefits
                 a wide range of natural language processing
                 applications. Given a sentence, the task of SRL is to
                 recognize semantic arguments (roles) for each predicate
                 (target verb or noun). Feature-based methods have
                 achieved much success in SRL and are regarded as the
                 state-of-the-art methods for SRL. However, these
                 methods are less effective in modeling structured
                 features. As an extension of feature-based methods,
                 kernel-based methods are able to capture structured
                 features more efficiently in a much higher dimension.
                 Application of kernel methods to SRL has been achieved
                 by selecting the tree portion of a predicate and one of
                 its arguments as feature space, which is named as
                 predicate-argument feature (PAF) kernel. The PAF kernel
                 captures the syntactic tree structure features using
                 convolution tree kernel, however, it does not
                 distinguish between the path structure and the
                 constituent structure. In this article, a hybrid
                 convolution tree kernel is proposed to model different
                 linguistic objects. The hybrid convolution tree kernel
                 consists of two individual convolution tree kernels.
                 They are a Path kernel, which captures
                 predicate-argument link features, and a Constituent
                 Structure kernel, which captures the syntactic
                 structure features of arguments. Evaluations on the
                 data sets of the CoNLL-2005 SRL shared task and the
                 Chinese PropBank (CPB) show that our proposed hybrid
                 convolution tree kernel statistically significantly
                 outperforms the previous tree kernels. Moreover, in
                 order to maximize the system performance, we present a
                 composite kernel through combining our hybrid
                 convolution tree kernel method with a feature-based
                 method extended by the polynomial kernel. The
                 experimental results show that the composite kernel
                 achieves better performance than each of the individual
                 methods and outperforms the best reported system on the
                 CoNLL-2005 corpus when only one syntactic parser is
                 used and on the CPB corpus when automated syntactic
                 parse results and correct syntactic parse results are
                 used respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  keywords =     "hybrid convolution tree kernel; semantic role
                 labeling",
}

@Article{Wu:2009:ISI,
  author =       "Chung-Hsien Wu and Haizhou Li",
  title =        "Introduction to the Special Issue on Recent
                 Advances in {Asian} Language Spoken Document
                 Retrieval",
  journal =      j-TALIP,
  volume =       "8",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1482343.1482344",
  ISSN =         "1530-0226",
  bibdate =      "Mon Mar 23 16:32:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
  articleno =    "1",
}

@Article{Chen:2009:WTM,
  author =       "Berlin Chen",
  title =        "Word Topic Models for Spoken Document Retrieval
                 and Transcription",
  journal =      j-TALIP,
  volume =       "8",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1482343.1482345",
  ISSN =         "1530-0226",
  bibdate =      "Mon Mar 23 16:32:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Statistical language modeling (LM), which aims to
                 capture the regularities in human natural language and
                 quantify the acceptability of a given word sequence,
                 has long been an interesting yet challenging research
                 topic in the speech and language processing community.
                 It also has been introduced to information retrieval
                 (IR) problems, and provided an effective and
                 theoretically attractive probabilistic framework for
                 building IR systems. In this article, we propose a word
                 topic model (WTM) to explore the co-occurrence
                 relationship between words, as well as the long-span
                 latent topical information, for language modeling in
                 spoken document retrieval and transcription. The
                 document or the search history as a whole is modeled as
                 a composite WTM model for generating a newly observed
                 word. The underlying characteristics and different
                 kinds of model structures are extensively investigated,
                 while the performance of WTM is thoroughly analyzed and
                 verified by comparison with the well-known
                 probabilistic latent semantic analysis (PLSA) model as
                 well as the other models. The IR experiments are
                 performed on the TDT Chinese collections (TDT-2 and
                 TDT-3), while the large vocabulary continuous speech
                 recognition (LVCSR) experiments are conducted on the
                 Mandarin broadcast news collected in Taiwan.
                 Experimental results seem to indicate that WTM is a
                 promising alternative to the existing models.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  keywords =     "adaptation; information retrieval; language model;
                 speech recognition; word topic model",
}

@Article{Lin:2009:CSP,
  author =       "Shih-Hsiang Lin and Berlin Chen and Hsin-Min Wang",
  title =        "A Comparative Study of Probabilistic Ranking
                 Models for {Chinese} Spoken Document Summarization",
  journal =      j-TALIP,
  volume =       "8",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1482343.1482346",
  ISSN =         "1530-0226",
  bibdate =      "Mon Mar 23 16:32:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Extractive document summarization automatically
                 selects a number of indicative sentences, passages, or
                 paragraphs from an original document according to a
                 target summarization ratio, and sequences them to form
                 a concise summary. In this article, we present a
                 comparative study of various probabilistic ranking
                 models for spoken document summarization, including
                 supervised classification-based summarizers and
                 unsupervised probabilistic generative summarizers. We
                 also investigate the use of unsupervised summarizers to
                 improve the performance of supervised summarizers when
                 manual labels are not available for training the
                 latter. A novel training data selection approach that
                 leverages the relevance information of spoken sentences
                 to select reliable document-summary pairs derived by
                 the probabilistic generative summarizers is explored
                 for training the classification-based summarizers.
                 Encouraging initial results on Mandarin Chinese
                 broadcast news data are demonstrated.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  keywords =     "extractive summarization; probabilistic ranking
                 models; relevance information; spoken document
                 summarization",
}

@Article{Chen:2009:TSH,
  author =       "Boxing Chen and Min Zhang and Ai Ti Aw",
  title =        "Two-Stage Hypotheses Generation for Spoken Language
                 Translation",
  journal =      j-TALIP,
  volume =       "8",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1482343.1482347",
  ISSN =         "1530-0226",
  bibdate =      "Mon Mar 23 16:32:22 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Spoken Language Translation (SLT) is the research area
                 that focuses on the translation of speech or text
                 between two spoken languages. Phrase-based and
                 syntax-based methods represent the state-of-the-art for
                 statistical machine translation (SMT). The phrase-based
                 method specializes in modeling local reorderings and
                 translations of multiword expressions. The syntax-based
                 method is enhanced by using syntactic knowledge, which
                 can better model long word reorderings, discontinuous
                 phrases, and syntactic structure. In this article, we
                 leverage on the strength of these two methods and
                 propose a strategy based on multiple hypotheses
                 generation in a two-stage framework for spoken language
                 translation. The hypotheses are generated in two
                 stages, namely, decoding and regeneration. In the
                 decoding stage, we apply state-of-the-art,
                 phrase-based, and syntax-based methods to generate
                 basic translation hypotheses. Then in the regeneration
                 stage, much more hypotheses that cannot be captured by
                 the decoding algorithms are produced from the basic
                 hypotheses. We study three regeneration methods:
                 redecoding, n-gram expansion, and confusion network in
                 the second stage. Finally, an additional reranking pass
                 is introduced to select the translation outputs by a
                 linear combination of rescoring models. Experimental
                 results on the Chinese-to-English IWSLT-2006 challenge
                 task of translating the transcription of spontaneous
                 speech show that the proposed mechanism achieves
                 significant improvements over the baseline of about
                 2.80 BLEU-score.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  keywords =     "hypotheses generation; spoken language translation;
                 statistical machine translation",
}

@Article{Chiang:2009:ISI,
  author =       "David Chiang and Philipp Koehn",
  title =        "Introduction to the Special Issue on Machine
                 Translation of {Asian} Language",
  journal =      j-TALIP,
  volume =       "8",
  number =       "2",
  pages =        "5:1--5:??",
  month =        may,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1526252.1526253",
  ISSN =         "1530-0226",
  bibdate =      "Wed Jun 3 16:13:52 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  acknowledgement = ack-nhfb,
  articleno =    "5",
}

@Article{He:2009:IMH,
  author =       "Xiaodong He and Mei Yang and Jianfeng Gao and Patrick
                 Nguyen and Robert Moore",
  title =        "Improved Monolingual Hypothesis Alignment for
                 Machine Translation System Combination",
  journal =      j-TALIP,
  volume =       "8",
  number =       "2",
  pages =        "6:1--6:??",
  month =        may,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1526252.1526254",
  ISSN =         "1530-0226",
  bibdate =      "Wed Jun 3 16:13:52 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "This article presents a new hypothesis alignment
                 method for combining outputs of multiple machine
                 translation (MT) systems. An indirect hidden Markov
                 model (IHMM) is proposed to address the synonym
                 matching and word ordering issues in hypothesis
                 alignment. Unlike traditional HMMs whose parameters are
                 trained via maximum likelihood estimation (MLE), the
                 parameters of the IHMM are estimated indirectly from a
                 variety of sources including word semantic similarity,
                 word surface similarity, and a distance-based
                 distortion penalty. The IHMM-based method significantly
                 outperforms the state-of-the-art, TER-based alignment
                 model in our experiments on NIST benchmark datasets.
                 Our combined SMT system using the proposed method
                 achieved the best Chinese-to-English translation result
                 in the constrained training track of the 2008 NIST Open
                 MT Evaluation.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  keywords =     "hidden Markov model; statistical machine translation;
                 system combination; word alignment",
}

@Article{Ma:2009:BMW,
  author =       "Yanjun Ma and Andy Way",
  title =        "Bilingually Motivated Word Segmentation for
                 Statistical Machine Translation",
  journal =      j-TALIP,
  volume =       "8",
  number =       "2",
  pages =        "7:1--7:??",
  month =        may,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1526252.1526255",
  ISSN =         "1530-0226",
  bibdate =      "Wed Jun 3 16:13:52 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "We introduce a bilingually motivated word segmentation
                 approach to languages where word boundaries are not
                 orthographically marked, with application to
                 Phrase-Based Statistical Machine Translation (PB-SMT).
                 Our approach is motivated from the insight that PB-SMT
                 systems can be improved by optimizing the input
                 representation to reduce the predictive power of
                 translation models. We firstly present an approach to
                 optimize the existing segmentation of both source and
                 target languages for PB-SMT and demonstrate the
                 effectiveness of this approach using a Chinese--English
                 MT task, that is, to measure the influence of the
                 segmentation on the performance of PB-SMT systems. We
                 report a 5.44\% relative increase in Bleu score and a
                 consistent increase according to other metrics. We then
                 generalize this method for Chinese word segmentation
                 without relying on any segmenters and show that using
                 our segmentation PB-SMT can achieve more consistent
                 state-of-the-art performance across two domains. There
                 are two main advantages of our approach. First of all,
                 it is adapted to the specific translation task at hand
                 by taking the corresponding source (target) language
                 into account. Second, this approach does not rely on
                 manually segmented training data so that it can be
                 automatically adapted for different domains.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  keywords =     "alignment; bilingually motivated; phrase-based
                 statistical machine translation; word segmentation",
}

@Article{Venkatapathy:2009:DMT,
  author =       "Sriram Venkatapathy and Srinivas Bangalore",
  title =        "Discriminative Machine Translation Using Global
                 Lexical Selection",
  journal =      j-TALIP,
  volume =       "8",
  number =       "2",
  pages =        "8:1--8:??",
  month =        may,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1526252.1526256",
  ISSN =         "1530-0226",
  bibdate =      "Wed Jun 3 16:13:52 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "Statistical phrase-based machine translation models
                 crucially rely on word alignments. The search for
                 word-alignments assumes a model of word locality
                 between source and target languages that is violated in
                 starkly different word-order languages such as
                 English-Hindi. In this article, we present models that
                 decouple the steps of lexical selection and lexical
                 reordering with the aim of minimizing the role of
                 word-alignment in machine translation. Indian languages
                 are morphologically rich and have relatively free-word
                 order where the grammatical role of content words is
                 largely determined by their case markers and not just
                 by their positions in the sentence. Hence, lexical
                 selection plays a far greater role than lexical
                 reordering. For lexical selection, we investigate
                 models that take the entire source sentence into
                 account and evaluate their performance for
                 English-Hindi translation in a tourism domain.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  keywords =     "global lexical selection; machine translation",
}

@Article{Tsunakawa:2009:CJL,
  author =       "Takashi Tsunakawa and Naoaki Okazaki and Xiao Liu and
                 Jun'ichi Tsujii",
  title =        "A {Chinese--Japanese} Lexical Machine Translation
                 through a Pivot Language",
  journal =      j-TALIP,
  volume =       "8",
  number =       "2",
  pages =        "9:1--9:??",
  month =        may,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1526252.1526257",
  ISSN =         "1530-0226",
  bibdate =      "Wed Jun 3 16:13:52 MDT 2009",
  bibsource =    "http://portal.acm.org/",
  abstract =     "The bilingual lexicon is an expensive but critical
                 resource for multilingual applications in natural
                 language processing. This article proposes an
                 integrated framework for building a bilingual lexicon
                 between the Chinese and Japanese languages. Since the
                 language pair Chinese-Japanese does not include
                 English, which is a central language of the world, few
                 large-scale bilingual resources between Chinese and
                 Japanese have been constructed. One solution to
                 alleviate this problem is to build a Chinese-Japanese
                 bilingual lexicon through English as the pivot
                 language. In addition to the pivotal approach, we can
                 make use of the characteristics of Chinese and Japanese
                 languages that use Han characters. We incorporate a
                 translation model obtained from a small
                 Chinese-Japanese lexicon and use the similarity of the
                 hanzi and kanji characters by using the log-linear
                 model. Our experimental results show that the use of
                 the pivotal approach can improve the translation
                 performance over the translation model built from a
                 small Chinese-Japanese lexicon. The results also
                 demonstrate that the similarity between the hanzi and
                 kanji characters provides a positive effect for
                 translating technical terms.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  keywords =     "bilingual lexicon; Han characters; hanzi; kanji; pivot
                 language; statistical machine translation",
}