Entry Kuo:2007:PSM from talip.bib

Last update: Sun Oct 15 02:55:04 MDT 2017

Index sections

Top | Symbols | Numbers | Math | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z

BibTeX entry

@Article{Kuo:2007:PSM,
  author =       "Jin-Shea Kuo and Haizhou Li and Ying-Kuei Yang",
  title =        "A phonetic similarity model for automatic extraction
                 of transliteration pairs",
  journal =      j-TALIP,
  volume =       "6",
  number =       "2",
  pages =        "6:1--6:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1282080.1282081",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Jun 16 17:11:28 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "This article proposes an approach for the automatic
                 extraction of transliteration pairs from Chinese Web
                 corpora. In this approach, we formulate the machine
                 transliteration process using a syllable-based phonetic
                 similarity model which consists of phonetic confusion
                 matrices and a Chinese character n -gram language
                 model. With the phonetic similarity model, the
                 extraction of transliteration pairs becomes a two-step
                 process of recognition followed by validation: First,
                 in the recognition process, we identify the most
                 probable transliteration in the k -neighborhood of a
                 recognized English word. Then, in the validation
                 process, we qualify the transliteration pair candidates
                 with a hypothesis test. We carry out an analytical
                 study on the statistics of several key factors in
                 English--Chinese transliteration to help formulate
                 phonetic similarity modeling. We then conduct both
                 supervised and unsupervised learning of a phonetic
                 similarity model on a development database. The
                 experimental results validate the effectiveness of the
                 phonetic similarity model by achieving an $F$-measure
                 of 0.739 in supervised learning. The unsupervised
                 learning approach works almost as well as the
                 supervised one, thus allowing us to deploy automatic
                 extraction of transliteration pairs in the Web space.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "extraction of transliteration pairs; machine
                 translation; machine transliteration; phonetic
                 confusion probability; phonetic similarity modeling",
}

Related entries

$F$, 6(4)3, 7(2)7
achieving, 11(3)11, 13(3)12
almost, 7(3)9, 9(3)11, 9(3)12, 11(1)1, 13(1)1
article, 3(4)227, 4(3)321, 5(2)121, 6(2)7, 6(2)8, 6(4)3, 7(1)1, 7(1)3, 7(2)5, 7(2)6, 7(2)7, 7(3)8, 7(3)9, 7(4)11, 7(4)12, 7(4)13, 8(1)2, 8(1)3, 8(1)4, 8(2)6, 8(2)8, 8(2)9, 8(3)10, 8(3)11, 8(3)12, 8(4)14, 8(4)16, 8(4)17, 8(4)18, 9(1)2, 9(1)4, 9(2)6, 9(3)10, 9(3)11, 9(3)12, 9(4)13, 9(4)14, 10(1)3, 10(1)5, 10(1)6, 10(2)7, 10(2)9, 10(2)10, 10(3)12, 10(3)13, 10(3)14, 10(3)15, 10(4)17, 10(4)18, 10(4)20, 10(4)21, 11(1)1, 11(2)4, 11(2)5, 11(2)7, 11(3)8, 11(3)10, 11(3)11, 11(4)13, 11(4)14, 11(4)15, 11(4)16, 11(4)17, 11(4)18, 12(1)1, 12(1)3, 12(1)4, 12(2)5, 12(2)6, 12(2)7, 12(3)9, 12(3)10, 12(3)11, 12(3)12, 12(4)14, 13(1)1, 13(1)2, 13(1)3, 13(1)4, 13(2)6, 13(2)7, 13(2)8, 13(2)9, 13(3)12, 13(3)13, 13(4)16
automatic, 1(3)207, 3(1)51, 5(2)165, 7(1)2, 8(4)15, 8(4)16, 8(4)18, 9(1)2, 9(2)6, 9(2)7, 10(1)5, 10(2)7, 10(3)12, 10(3)13, 10(4)21, 12(2)7, 12(4)14, 12(4)15, 13(3)13
become, 7(4)11, 8(3)10
both, 6(2)7, 6(3)10, 7(1)1, 7(1)2, 7(1)3, 7(3)8, 8(2)7, 8(3)11, 8(3)12, 9(1)2, 9(1)4, 9(2)5, 10(1)2, 10(1)4, 10(1)6, 10(2)10, 10(3)13, 10(3)15, 10(4)20, 10(4)21, 11(2)4, 11(2)6, 11(2)7, 11(3)9, 11(4)18, 12(2)5, 12(2)7, 12(4)17, 13(2)9, 13(4)17
candidate, 5(2)121, 7(2)5, 7(2)6, 10(1)3, 10(3)16, 10(4)21, 11(2)6, 11(3)8, 12(3)9, 12(4)15, 13(1)2
carry, 8(3)12, 10(4)17
character, 1(3)269, 2(1)27, 6(2)8, 7(4)11, 8(2)9, 8(3)11, 9(4)14, 10(2)10, 11(2)7, 12(1)1, 12(1)2, 12(2)6, 12(3)9, 12(4)16, 13(2)6, 13(2)8, 13(3)12, 13(3)14, 13(4)18
Chinese, English-, 7(2)5, 10(4)19
conduct, 8(4)16, 10(4)20, 11(4)14, 13(2)7
confusion, 7(4)11, 8(1)4, 10(1)3
consist, 7(3)8, 7(4)13, 12(1)2
corpora, 5(2)89, 5(2)121, 6(3)11, 7(2)6, 9(2)5, 9(4)13, 10(3)15, 10(4)19, 10(4)21, 11(2)6, 11(2)7, 11(3)11, 12(2)7, 13(1)1, 13(1)3, 13(2)9, 13(3)11
database, 6(2)8, 9(1)1
development, 2(2)101, 2(3)290, 5(2)146, 9(1)4, 10(2)10, 12(2)6, 13(4)17
effectiveness, 6(2)8, 6(4)2, 7(2)5, 7(3)10, 8(2)7, 8(4)17, 9(2)5, 9(3)11, 9(3)12, 10(3)15, 11(2)5, 11(4)18, 12(4)16, 12(4)17, 13(2)6, 13(3)13, 13(4)18
English, 2(3)245, 4(2)135, 5(2)89, 5(2)121, 5(3)245, 6(2)7, 6(3)11, 6(4)2, 7(1)1, 7(4)11, 8(2)9, 8(4)15, 8(4)16, 8(4)17, 9(1)1, 9(1)3, 9(2)7, 9(3)12, 9(4)14, 9(4)15, 10(1)2, 10(1)4, 10(2)8, 10(3)14, 10(3)15, 10(4)17, 11(2)4, 11(2)5, 11(2)6, 11(3)8, 11(3)11, 12(2)5, 12(3)12, 12(4)14, 12(4)17, 13(1)1, 13(4)16
English--Chinese, 7(2)5, 10(4)19
experimental, 5(2)121, 6(2)7, 7(2)5, 7(2)6, 7(2)7, 7(3)10, 7(4)13, 8(1)2, 8(1)4, 8(2)9, 9(1)1, 9(1)2, 10(2)7, 11(3)9, 11(4)14, 12(3)10, 12(4)14, 13(2)7
extraction, 1(1)34, 2(3)295, 5(1)61, 5(2)121, 7(1)1, 8(4)16, 8(4)17, 10(3)14, 10(3)15, 11(1)2, 11(2)6, 12(3)11, 13(1)3
factor, 10(2)10, 11(3)10, 12(4)14
first, 5(2)165, 6(4)3, 7(1)1, 7(3)8, 7(3)10, 8(2)7, 8(3)10, 8(3)11, 8(4)19, 9(3)10, 9(3)11, 10(3)13, 11(2)6, 11(2)7, 11(3)8, 11(3)9, 11(3)11, 12(1)3, 12(1)4, 12(2)5, 12(3)10, 12(4)17, 13(2)7, 13(2)9, 13(3)13, 13(4)17
followed, 4(4)417, 6(3)10, 8(4)19, 13(1)1
gram, 3(2)113, 9(2)5, 9(2)7, 9(3)11, 11(2)7, 12(1)1, 12(4)15
help, 5(2)146, 7(3)8, 8(4)17, 8(4)18, 9(1)3, 9(2)7, 10(2)10, 11(3)9, 12(1)3, 12(4)15
hypothesis, 6(2)7, 8(2)6, 11(4)16
identify, 5(2)146, 8(3)11, 9(3)12, 10(1)5, 11(2)6, 11(4)18, 12(3)10
key, 7(1)2, 8(3)12, 9(3)11, 12(3)11, 13(4)18
learning, 3(2)159, 5(1)61, 5(2)121, 5(4)413, 6(4)1, 6(4)3, 7(2)7, 7(3)9, 8(3)10, 8(4)15, 9(1)3, 9(2)5, 10(1)5, 10(2)10, 10(3)16, 10(4)20, 10(4)21, 11(1)3, 11(4)14, 11(4)16, 12(1)1, 12(1)3, 12(2)5, 12(2)7, 12(4)15, 13(1)2, 13(1)3, 13(2)9, 13(4)16, 13(4)17
Li, Haizhou, 8(1)1
machine, 4(1)18, 4(4)377, 5(2)89, 5(3)185, 6(4)2, 6(4)3, 7(1)1, 7(2)5, 7(2)7, 7(3)9, 7(3)10, 8(1)4, 8(2)5, 8(2)6, 8(2)7, 8(2)8, 8(2)9, 8(3)10, 8(4)15, 9(1)3, 9(4)13, 10(1)2, 10(1)5, 10(3)16, 10(4)18, 10(4)20, 11(1)2, 11(3)8, 11(4)14, 11(4)16, 12(3)9, 12(3)12, 12(4)14, 12(4)16, 12(4)17, 13(1)2, 13(3)11, 13(4)16, 13(4)17
measure, 5(2)89, 6(4)3, 8(2)7, 9(2)7, 10(1)2, 10(1)6, 10(4)20, 11(2)6, 11(3)9, 11(3)11, 13(3)11, 13(3)13, 13(3)14
modeling, 1(1)3, 1(3)173, 3(2)87, 3(3)169, 6(1)z, 6(3)9, 7(3)10, 7(4)13, 8(1)2, 8(1)4, 9(4)14, 10(4)18, 10(4)21, 11(2)5, 12(2)5, 13(3)12, 13(4)16
most, 7(1)1, 7(3)8, 7(3)10, 8(4)15, 9(1)1, 9(2)5, 9(3)11, 10(1)5, 10(2)8, 12(1)1, 12(1)2, 13(1)1, 13(1)4, 13(2)6, 13(3)14, 13(4)18
n, 9(2)7, 11(2)7, 12(1)1
one, 5(2)89, 5(2)121, 6(3)9, 6(4)3, 7(3)8, 7(3)9, 7(4)11, 7(4)13, 8(2)9, 8(4)16, 8(4)17, 9(1)1, 9(2)5, 9(2)7, 9(3)12, 9(4)14, 10(1)5, 10(3)12, 10(3)13, 10(4)19, 11(2)4, 11(2)6, 11(2)7, 11(4)14, 12(1)1, 12(1)2, 12(2)5, 12(2)7, 12(3)11, 12(4)16, 13(1)4, 13(2)10, 13(4)17, 13(4)18
pair, 5(2)89, 5(2)121, 6(3)11, 7(1)1, 7(3)8, 7(4)12, 8(1)3, 8(2)9, 8(4)17, 10(4)19, 10(4)21, 11(3)11, 11(4)13, 11(4)18, 12(3)9, 12(4)14, 13(3)11
phonetic, 1(1)65, 2(1)63, 12(1)2
probability, 5(2)121, 8(4)19, 10(1)6, 10(3)16, 11(2)5, 11(2)6, 12(3)9, 13(4)18
probable, 13(3)14
process, 5(2)121, 6(3)10, 7(1)1, 7(3)9, 10(2)10, 10(4)18, 12(3)9, 13(1)4, 13(2)9
propose, 5(2)89, 6(2)8, 6(3)11, 7(3)8, 7(3)10, 7(4)12, 8(1)2, 8(1)4, 8(2)9, 8(4)19, 9(2)7, 9(4)13, 10(2)10, 10(3)12, 10(3)15, 10(4)17, 10(4)20, 11(2)7, 11(3)9, 11(3)11, 11(4)15, 11(4)16, 11(4)18, 12(1)1, 12(1)2, 12(1)3, 12(1)4, 12(2)5, 12(2)6, 12(3)9, 12(3)10, 12(3)12, 12(4)16, 13(1)2, 13(1)3, 13(2)8, 13(2)9, 13(3)12, 13(3)13, 13(4)17, 13(4)18
recognition, 1(1)83, 1(4)297, 2(1)27, 2(3)290, 5(1)4, 5(2)165, 6(3)9, 6(4)3, 7(1)2, 7(3)10, 8(1)2, 8(3)11, 8(4)18, 9(1)2, 9(2)7, 10(1)6, 10(2)7, 10(2)9, 10(3)13, 11(1)1, 11(1)2, 11(4)13, 11(4)16, 11(4)17, 11(4)18, 12(1)4, 12(3)10, 13(3)12, 13(4)16
recognized, 7(1)2, 10(2)7, 12(3)10
result, 4(2)135, 5(2)121, 5(2)146, 5(2)165, 6(2)7, 6(3)9, 6(3)11, 6(4)3, 7(1)2, 7(2)5, 7(2)6, 7(2)7, 7(3)8, 7(3)10, 7(4)11, 7(4)12, 7(4)13, 8(1)2, 8(1)3, 8(1)4, 8(2)6, 8(2)9, 8(3)10, 8(3)12, 8(4)14, 8(4)15, 8(4)16, 8(4)17, 8(4)18, 8(4)19, 9(1)1, 9(1)2, 9(2)5, 9(2)6, 9(2)7, 9(3)11, 9(3)12, 9(4)14, 10(1)2, 10(2)7, 11(2)4, 11(2)5, 11(3)8, 11(3)9, 11(3)11, 11(4)13, 11(4)14, 11(4)15, 12(1)3, 12(1)4, 12(2)5, 12(2)7, 12(3)9, 12(3)10, 12(3)11, 12(4)14, 12(4)16, 13(1)1, 13(1)4, 13(2)6, 13(2)7, 13(2)9, 13(3)11, 13(3)12, 13(3)14
several, 6(2)7, 6(4)3, 7(2)5, 7(2)7, 7(3)10, 8(3)10, 8(4)16, 8(4)17, 8(4)18, 9(3)12, 11(2)6, 11(4)13, 11(4)16, 12(1)2, 13(3)12, 13(3)14
similarity, 5(2)89, 5(2)165, 6(1)z-1, 7(3)8, 8(2)6, 8(2)9, 9(1)1, 10(2)10, 11(2)5, 11(2)6, 11(3)9, 11(3)11
space, 6(2)7, 7(4)13, 11(2)5, 11(3)9
statistics, 1(3)269, 5(4)413, 7(3)8
step, two-, 7(3)10, 12(3)12
study, 4(2)159, 4(3)243, 5(2)121, 5(2)146, 5(2)165, 5(3)209, 6(2)7, 8(1)3, 8(1)4, 8(4)16, 9(2)5, 9(2)6, 9(2)7, 9(3)11, 10(2)10, 10(3)12, 10(4)17, 10(4)18, 11(1)3, 11(2)6, 11(3)9, 11(3)11, 11(4)14, 13(1)3, 13(2)7, 13(3)11, 13(3)12, 13(3)14
supervised, 8(1)3, 8(3)10, 9(1)2, 9(1)4, 11(2)4, 13(1)3, 13(2)9
test, 7(2)5, 9(1)3, 9(3)10, 9(3)11, 11(4)13, 12(3)9, 13(1)4, 13(2)7, 13(3)13, 13(4)17
then, 5(2)121, 7(1)1, 7(3)10, 7(4)12, 8(1)4, 8(2)7, 8(3)10, 8(3)11, 8(3)12, 8(4)14, 9(1)1, 9(2)7, 9(3)11, 10(2)7, 10(3)13, 10(4)20, 11(1)3, 11(2)7, 11(3)11, 11(4)15, 12(1)3, 12(3)10, 12(4)17, 13(1)4, 13(2)9, 13(3)13, 13(4)16
thus, 7(4)11, 9(2)7, 10(2)9, 12(1)1, 12(4)15
transliteration, 5(2)121, 5(3)185, 6(1)z-1, 9(1)1, 9(4)13, 9(4)14, 10(4)19, 12(3)9
two-step, 7(3)10, 12(3)12
unsupervised, 7(3)9, 8(1)3, 9(1)3, 9(1)4, 9(2)7, 10(2)7, 11(3)9, 13(3)12
validate, 9(2)7
Web, 1(2)159, 3(1)66, 7(3)8, 7(4)11, 8(3)12, 9(1)1, 9(4)15, 10(4)21, 11(2)4, 11(3)11, 11(4)16
well, 5(2)121, 6(3)11, 7(3)8, 7(3)9, 7(3)10, 8(1)2, 8(4)18, 9(1)2, 9(3)12, 10(3)15, 11(1)2, 11(2)4, 11(3)11, 12(3)9, 13(1)2
which, 5(2)89, 5(2)121, 6(2)8, 6(3)10, 6(4)1, 7(1)3, 7(2)7, 7(3)10, 7(4)13, 8(1)2, 8(1)4, 8(2)9, 8(3)10, 8(3)12, 8(4)14, 8(4)16, 8(4)18, 9(1)1, 9(2)5, 9(3)12, 9(4)14, 9(4)15, 10(1)4, 10(1)6, 10(2)7, 10(2)8, 10(2)9, 10(3)12, 10(3)13, 10(3)14, 10(3)15, 10(4)17, 10(4)19, 10(4)20, 11(2)6, 11(3)8, 11(3)9, 11(4)13, 11(4)14, 11(4)15, 11(4)16, 11(4)18, 12(1)1, 12(1)3, 12(1)4, 12(2)5, 12(2)6, 12(2)7, 12(3)10, 12(3)11, 12(4)15, 12(4)17, 13(1)1, 13(2)8, 13(3)13, 13(4)17, 13(4)18
work, 5(2)121, 6(3)11, 6(4)2, 7(2)7, 7(3)9, 8(4)19, 9(2)5, 9(4)15, 10(1)4, 10(2)10, 12(1)3, 12(3)9, 13(1)1, 13(2)9, 13(3)14, 13(4)18