Entry Kuo:2007:PSM from talip.bib
Last update: Sun Oct 15 02:55:04 MDT 2017
Top |
Symbols |
Numbers |
Math |
A |
B |
C |
D |
E |
F |
G |
H |
I |
J |
K |
L |
M |
N |
O |
P |
Q |
R |
S |
T |
U |
V |
W |
X |
Y |
Z
BibTeX entry
@Article{Kuo:2007:PSM,
author = "Jin-Shea Kuo and Haizhou Li and Ying-Kuei Yang",
title = "A phonetic similarity model for automatic extraction
of transliteration pairs",
journal = j-TALIP,
volume = "6",
number = "2",
pages = "6:1--6:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1282080.1282081",
ISSN = "1530-0226 (print), 1558-3430 (electronic)",
ISSN-L = "1530-0226",
bibdate = "Mon Jun 16 17:11:28 MDT 2008",
bibsource = "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/talip.bib",
abstract = "This article proposes an approach for the automatic
extraction of transliteration pairs from Chinese Web
corpora. In this approach, we formulate the machine
transliteration process using a syllable-based phonetic
similarity model which consists of phonetic confusion
matrices and a Chinese character n -gram language
model. With the phonetic similarity model, the
extraction of transliteration pairs becomes a two-step
process of recognition followed by validation: First,
in the recognition process, we identify the most
probable transliteration in the k -neighborhood of a
recognized English word. Then, in the validation
process, we qualify the transliteration pair candidates
with a hypothesis test. We carry out an analytical
study on the statistics of several key factors in
English--Chinese transliteration to help formulate
phonetic similarity modeling. We then conduct both
supervised and unsupervised learning of a phonetic
similarity model on a development database. The
experimental results validate the effectiveness of the
phonetic similarity model by achieving an $F$-measure
of 0.739 in supervised learning. The unsupervised
learning approach works almost as well as the
supervised one, thus allowing us to deploy automatic
extraction of transliteration pairs in the Web space.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Asian Language Information
Processing",
journal-URL = "http://portal.acm.org/browse_dl.cfm?&idx=J820",
keywords = "extraction of transliteration pairs; machine
translation; machine transliteration; phonetic
confusion probability; phonetic similarity modeling",
}
Related entries
- $F$,
6(4)3,
7(2)7
- achieving,
11(3)11,
13(3)12
- almost,
7(3)9,
9(3)11,
9(3)12,
11(1)1,
13(1)1
- article,
3(4)227,
4(3)321,
5(2)121,
6(2)7,
6(2)8,
6(4)3,
7(1)1,
7(1)3,
7(2)5,
7(2)6,
7(2)7,
7(3)8,
7(3)9,
7(4)11,
7(4)12,
7(4)13,
8(1)2,
8(1)3,
8(1)4,
8(2)6,
8(2)8,
8(2)9,
8(3)10,
8(3)11,
8(3)12,
8(4)14,
8(4)16,
8(4)17,
8(4)18,
9(1)2,
9(1)4,
9(2)6,
9(3)10,
9(3)11,
9(3)12,
9(4)13,
9(4)14,
10(1)3,
10(1)5,
10(1)6,
10(2)7,
10(2)9,
10(2)10,
10(3)12,
10(3)13,
10(3)14,
10(3)15,
10(4)17,
10(4)18,
10(4)20,
10(4)21,
11(1)1,
11(2)4,
11(2)5,
11(2)7,
11(3)8,
11(3)10,
11(3)11,
11(4)13,
11(4)14,
11(4)15,
11(4)16,
11(4)17,
11(4)18,
12(1)1,
12(1)3,
12(1)4,
12(2)5,
12(2)6,
12(2)7,
12(3)9,
12(3)10,
12(3)11,
12(3)12,
12(4)14,
13(1)1,
13(1)2,
13(1)3,
13(1)4,
13(2)6,
13(2)7,
13(2)8,
13(2)9,
13(3)12,
13(3)13,
13(4)16
- automatic,
1(3)207,
3(1)51,
5(2)165,
7(1)2,
8(4)15,
8(4)16,
8(4)18,
9(1)2,
9(2)6,
9(2)7,
10(1)5,
10(2)7,
10(3)12,
10(3)13,
10(4)21,
12(2)7,
12(4)14,
12(4)15,
13(3)13
- become,
7(4)11,
8(3)10
- both,
6(2)7,
6(3)10,
7(1)1,
7(1)2,
7(1)3,
7(3)8,
8(2)7,
8(3)11,
8(3)12,
9(1)2,
9(1)4,
9(2)5,
10(1)2,
10(1)4,
10(1)6,
10(2)10,
10(3)13,
10(3)15,
10(4)20,
10(4)21,
11(2)4,
11(2)6,
11(2)7,
11(3)9,
11(4)18,
12(2)5,
12(2)7,
12(4)17,
13(2)9,
13(4)17
- candidate,
5(2)121,
7(2)5,
7(2)6,
10(1)3,
10(3)16,
10(4)21,
11(2)6,
11(3)8,
12(3)9,
12(4)15,
13(1)2
- carry,
8(3)12,
10(4)17
- character,
1(3)269,
2(1)27,
6(2)8,
7(4)11,
8(2)9,
8(3)11,
9(4)14,
10(2)10,
11(2)7,
12(1)1,
12(1)2,
12(2)6,
12(3)9,
12(4)16,
13(2)6,
13(2)8,
13(3)12,
13(3)14,
13(4)18
- Chinese, English-,
7(2)5,
10(4)19
- conduct,
8(4)16,
10(4)20,
11(4)14,
13(2)7
- confusion,
7(4)11,
8(1)4,
10(1)3
- consist,
7(3)8,
7(4)13,
12(1)2
- corpora,
5(2)89,
5(2)121,
6(3)11,
7(2)6,
9(2)5,
9(4)13,
10(3)15,
10(4)19,
10(4)21,
11(2)6,
11(2)7,
11(3)11,
12(2)7,
13(1)1,
13(1)3,
13(2)9,
13(3)11
- database,
6(2)8,
9(1)1
- development,
2(2)101,
2(3)290,
5(2)146,
9(1)4,
10(2)10,
12(2)6,
13(4)17
- effectiveness,
6(2)8,
6(4)2,
7(2)5,
7(3)10,
8(2)7,
8(4)17,
9(2)5,
9(3)11,
9(3)12,
10(3)15,
11(2)5,
11(4)18,
12(4)16,
12(4)17,
13(2)6,
13(3)13,
13(4)18
- English,
2(3)245,
4(2)135,
5(2)89,
5(2)121,
5(3)245,
6(2)7,
6(3)11,
6(4)2,
7(1)1,
7(4)11,
8(2)9,
8(4)15,
8(4)16,
8(4)17,
9(1)1,
9(1)3,
9(2)7,
9(3)12,
9(4)14,
9(4)15,
10(1)2,
10(1)4,
10(2)8,
10(3)14,
10(3)15,
10(4)17,
11(2)4,
11(2)5,
11(2)6,
11(3)8,
11(3)11,
12(2)5,
12(3)12,
12(4)14,
12(4)17,
13(1)1,
13(4)16
- English--Chinese,
7(2)5,
10(4)19
- experimental,
5(2)121,
6(2)7,
7(2)5,
7(2)6,
7(2)7,
7(3)10,
7(4)13,
8(1)2,
8(1)4,
8(2)9,
9(1)1,
9(1)2,
10(2)7,
11(3)9,
11(4)14,
12(3)10,
12(4)14,
13(2)7
- extraction,
1(1)34,
2(3)295,
5(1)61,
5(2)121,
7(1)1,
8(4)16,
8(4)17,
10(3)14,
10(3)15,
11(1)2,
11(2)6,
12(3)11,
13(1)3
- factor,
10(2)10,
11(3)10,
12(4)14
- first,
5(2)165,
6(4)3,
7(1)1,
7(3)8,
7(3)10,
8(2)7,
8(3)10,
8(3)11,
8(4)19,
9(3)10,
9(3)11,
10(3)13,
11(2)6,
11(2)7,
11(3)8,
11(3)9,
11(3)11,
12(1)3,
12(1)4,
12(2)5,
12(3)10,
12(4)17,
13(2)7,
13(2)9,
13(3)13,
13(4)17
- followed,
4(4)417,
6(3)10,
8(4)19,
13(1)1
- gram,
3(2)113,
9(2)5,
9(2)7,
9(3)11,
11(2)7,
12(1)1,
12(4)15
- help,
5(2)146,
7(3)8,
8(4)17,
8(4)18,
9(1)3,
9(2)7,
10(2)10,
11(3)9,
12(1)3,
12(4)15
- hypothesis,
6(2)7,
8(2)6,
11(4)16
- identify,
5(2)146,
8(3)11,
9(3)12,
10(1)5,
11(2)6,
11(4)18,
12(3)10
- key,
7(1)2,
8(3)12,
9(3)11,
12(3)11,
13(4)18
- learning,
3(2)159,
5(1)61,
5(2)121,
5(4)413,
6(4)1,
6(4)3,
7(2)7,
7(3)9,
8(3)10,
8(4)15,
9(1)3,
9(2)5,
10(1)5,
10(2)10,
10(3)16,
10(4)20,
10(4)21,
11(1)3,
11(4)14,
11(4)16,
12(1)1,
12(1)3,
12(2)5,
12(2)7,
12(4)15,
13(1)2,
13(1)3,
13(2)9,
13(4)16,
13(4)17
- Li, Haizhou,
8(1)1
- machine,
4(1)18,
4(4)377,
5(2)89,
5(3)185,
6(4)2,
6(4)3,
7(1)1,
7(2)5,
7(2)7,
7(3)9,
7(3)10,
8(1)4,
8(2)5,
8(2)6,
8(2)7,
8(2)8,
8(2)9,
8(3)10,
8(4)15,
9(1)3,
9(4)13,
10(1)2,
10(1)5,
10(3)16,
10(4)18,
10(4)20,
11(1)2,
11(3)8,
11(4)14,
11(4)16,
12(3)9,
12(3)12,
12(4)14,
12(4)16,
12(4)17,
13(1)2,
13(3)11,
13(4)16,
13(4)17
- measure,
5(2)89,
6(4)3,
8(2)7,
9(2)7,
10(1)2,
10(1)6,
10(4)20,
11(2)6,
11(3)9,
11(3)11,
13(3)11,
13(3)13,
13(3)14
- modeling,
1(1)3,
1(3)173,
3(2)87,
3(3)169,
6(1)z,
6(3)9,
7(3)10,
7(4)13,
8(1)2,
8(1)4,
9(4)14,
10(4)18,
10(4)21,
11(2)5,
12(2)5,
13(3)12,
13(4)16
- most,
7(1)1,
7(3)8,
7(3)10,
8(4)15,
9(1)1,
9(2)5,
9(3)11,
10(1)5,
10(2)8,
12(1)1,
12(1)2,
13(1)1,
13(1)4,
13(2)6,
13(3)14,
13(4)18
- n,
9(2)7,
11(2)7,
12(1)1
- one,
5(2)89,
5(2)121,
6(3)9,
6(4)3,
7(3)8,
7(3)9,
7(4)11,
7(4)13,
8(2)9,
8(4)16,
8(4)17,
9(1)1,
9(2)5,
9(2)7,
9(3)12,
9(4)14,
10(1)5,
10(3)12,
10(3)13,
10(4)19,
11(2)4,
11(2)6,
11(2)7,
11(4)14,
12(1)1,
12(1)2,
12(2)5,
12(2)7,
12(3)11,
12(4)16,
13(1)4,
13(2)10,
13(4)17,
13(4)18
- pair,
5(2)89,
5(2)121,
6(3)11,
7(1)1,
7(3)8,
7(4)12,
8(1)3,
8(2)9,
8(4)17,
10(4)19,
10(4)21,
11(3)11,
11(4)13,
11(4)18,
12(3)9,
12(4)14,
13(3)11
- phonetic,
1(1)65,
2(1)63,
12(1)2
- probability,
5(2)121,
8(4)19,
10(1)6,
10(3)16,
11(2)5,
11(2)6,
12(3)9,
13(4)18
- probable,
13(3)14
- process,
5(2)121,
6(3)10,
7(1)1,
7(3)9,
10(2)10,
10(4)18,
12(3)9,
13(1)4,
13(2)9
- propose,
5(2)89,
6(2)8,
6(3)11,
7(3)8,
7(3)10,
7(4)12,
8(1)2,
8(1)4,
8(2)9,
8(4)19,
9(2)7,
9(4)13,
10(2)10,
10(3)12,
10(3)15,
10(4)17,
10(4)20,
11(2)7,
11(3)9,
11(3)11,
11(4)15,
11(4)16,
11(4)18,
12(1)1,
12(1)2,
12(1)3,
12(1)4,
12(2)5,
12(2)6,
12(3)9,
12(3)10,
12(3)12,
12(4)16,
13(1)2,
13(1)3,
13(2)8,
13(2)9,
13(3)12,
13(3)13,
13(4)17,
13(4)18
- recognition,
1(1)83,
1(4)297,
2(1)27,
2(3)290,
5(1)4,
5(2)165,
6(3)9,
6(4)3,
7(1)2,
7(3)10,
8(1)2,
8(3)11,
8(4)18,
9(1)2,
9(2)7,
10(1)6,
10(2)7,
10(2)9,
10(3)13,
11(1)1,
11(1)2,
11(4)13,
11(4)16,
11(4)17,
11(4)18,
12(1)4,
12(3)10,
13(3)12,
13(4)16
- recognized,
7(1)2,
10(2)7,
12(3)10
- result,
4(2)135,
5(2)121,
5(2)146,
5(2)165,
6(2)7,
6(3)9,
6(3)11,
6(4)3,
7(1)2,
7(2)5,
7(2)6,
7(2)7,
7(3)8,
7(3)10,
7(4)11,
7(4)12,
7(4)13,
8(1)2,
8(1)3,
8(1)4,
8(2)6,
8(2)9,
8(3)10,
8(3)12,
8(4)14,
8(4)15,
8(4)16,
8(4)17,
8(4)18,
8(4)19,
9(1)1,
9(1)2,
9(2)5,
9(2)6,
9(2)7,
9(3)11,
9(3)12,
9(4)14,
10(1)2,
10(2)7,
11(2)4,
11(2)5,
11(3)8,
11(3)9,
11(3)11,
11(4)13,
11(4)14,
11(4)15,
12(1)3,
12(1)4,
12(2)5,
12(2)7,
12(3)9,
12(3)10,
12(3)11,
12(4)14,
12(4)16,
13(1)1,
13(1)4,
13(2)6,
13(2)7,
13(2)9,
13(3)11,
13(3)12,
13(3)14
- several,
6(2)7,
6(4)3,
7(2)5,
7(2)7,
7(3)10,
8(3)10,
8(4)16,
8(4)17,
8(4)18,
9(3)12,
11(2)6,
11(4)13,
11(4)16,
12(1)2,
13(3)12,
13(3)14
- similarity,
5(2)89,
5(2)165,
6(1)z-1,
7(3)8,
8(2)6,
8(2)9,
9(1)1,
10(2)10,
11(2)5,
11(2)6,
11(3)9,
11(3)11
- space,
6(2)7,
7(4)13,
11(2)5,
11(3)9
- statistics,
1(3)269,
5(4)413,
7(3)8
- step, two-,
7(3)10,
12(3)12
- study,
4(2)159,
4(3)243,
5(2)121,
5(2)146,
5(2)165,
5(3)209,
6(2)7,
8(1)3,
8(1)4,
8(4)16,
9(2)5,
9(2)6,
9(2)7,
9(3)11,
10(2)10,
10(3)12,
10(4)17,
10(4)18,
11(1)3,
11(2)6,
11(3)9,
11(3)11,
11(4)14,
13(1)3,
13(2)7,
13(3)11,
13(3)12,
13(3)14
- supervised,
8(1)3,
8(3)10,
9(1)2,
9(1)4,
11(2)4,
13(1)3,
13(2)9
- test,
7(2)5,
9(1)3,
9(3)10,
9(3)11,
11(4)13,
12(3)9,
13(1)4,
13(2)7,
13(3)13,
13(4)17
- then,
5(2)121,
7(1)1,
7(3)10,
7(4)12,
8(1)4,
8(2)7,
8(3)10,
8(3)11,
8(3)12,
8(4)14,
9(1)1,
9(2)7,
9(3)11,
10(2)7,
10(3)13,
10(4)20,
11(1)3,
11(2)7,
11(3)11,
11(4)15,
12(1)3,
12(3)10,
12(4)17,
13(1)4,
13(2)9,
13(3)13,
13(4)16
- thus,
7(4)11,
9(2)7,
10(2)9,
12(1)1,
12(4)15
- transliteration,
5(2)121,
5(3)185,
6(1)z-1,
9(1)1,
9(4)13,
9(4)14,
10(4)19,
12(3)9
- two-step,
7(3)10,
12(3)12
- unsupervised,
7(3)9,
8(1)3,
9(1)3,
9(1)4,
9(2)7,
10(2)7,
11(3)9,
13(3)12
- validate,
9(2)7
- Web,
1(2)159,
3(1)66,
7(3)8,
7(4)11,
8(3)12,
9(1)1,
9(4)15,
10(4)21,
11(2)4,
11(3)11,
11(4)16
- well,
5(2)121,
6(3)11,
7(3)8,
7(3)9,
7(3)10,
8(1)2,
8(4)18,
9(1)2,
9(3)12,
10(3)15,
11(1)2,
11(2)4,
11(3)11,
12(3)9,
13(1)2
- which,
5(2)89,
5(2)121,
6(2)8,
6(3)10,
6(4)1,
7(1)3,
7(2)7,
7(3)10,
7(4)13,
8(1)2,
8(1)4,
8(2)9,
8(3)10,
8(3)12,
8(4)14,
8(4)16,
8(4)18,
9(1)1,
9(2)5,
9(3)12,
9(4)14,
9(4)15,
10(1)4,
10(1)6,
10(2)7,
10(2)8,
10(2)9,
10(3)12,
10(3)13,
10(3)14,
10(3)15,
10(4)17,
10(4)19,
10(4)20,
11(2)6,
11(3)8,
11(3)9,
11(4)13,
11(4)14,
11(4)15,
11(4)16,
11(4)18,
12(1)1,
12(1)3,
12(1)4,
12(2)5,
12(2)6,
12(2)7,
12(3)10,
12(3)11,
12(4)15,
12(4)17,
13(1)1,
13(2)8,
13(3)13,
13(4)17,
13(4)18
- work,
5(2)121,
6(3)11,
6(4)2,
7(2)7,
7(3)9,
8(4)19,
9(2)5,
9(4)15,
10(1)4,
10(2)10,
12(1)3,
12(3)9,
13(1)1,
13(2)9,
13(3)14,
13(4)18