Entry Guo:2010:LIS from talip.bib
Last update: Sun Oct 15 02:55:04 MDT 2017
Top |
Symbols |
Numbers |
Math |
A |
B |
C |
D |
E |
F |
G |
H |
I |
J |
K |
L |
M |
N |
O |
P |
Q |
R |
S |
T |
U |
V |
W |
X |
Y |
Z
BibTeX entry
@Article{Guo:2010:LIS,
author = "Yuqing Guo and Haifeng Wang and Josef van Genabith",
title = "A Linguistically Inspired Statistical Model for
{Chinese} Punctuation Generation",
journal = j-TALIP,
volume = "9",
number = "2",
pages = "6:1--6:??",
month = jun,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1781134.1781136",
ISSN = "1530-0226 (print), 1558-3430 (electronic)",
ISSN-L = "1530-0226",
bibdate = "Mon Jun 21 18:03:02 MDT 2010",
bibsource = "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/talip.bib",
abstract = "This article investigates a relatively underdeveloped
subject in natural language processing---the generation
of punctuation marks. From a theoretical perspective,
we study 16 Chinese punctuation marks as defined in the
Chinese national standard of punctuation usage, and
categorize these punctuation marks into three different
types according to their syntactic properties. We
implement a three-tier maximum entropy model
incorporating linguistically-motivated features for
generating the commonly used Chinese punctuation marks
in unpunctuated sentences output by a surface realizer.
Furthermore, we present a method to automatically
extract cue words indicating sentence-final punctuation
marks as a specialized feature to construct a more
precise model. Evaluating on the Penn Chinese Treebank
data, the MaxEnt model achieves an {\em f\/} -score of
79.83\% for punctuation insertion and 74.61\% for
punctuation restoration using gold data input, 79.50\%
for insertion and 73.32\% for restoration using
parser-based imperfect input. The experiments show that
the MaxEnt model significantly outperforms a baseline
5-gram language model that scores 54.99\% for
punctuation insertion and 52.01\% for restoration. We
show that our results are not far from human
performance on the same task with human insertion {\em
f\/} -scores in the range of 81-87\% and human
restoration in the range of 71-82\%. Finally, a manual
error analysis of the generation output shows that
close to 40\% of the mismatched punctuation marks do in
fact result in acceptable choices, a fact obscured in
the automatic string-matching based evaluation
scores.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Asian Language Information
Processing",
journal-URL = "http://portal.acm.org/browse_dl.cfm?&idx=J820",
keywords = "Chinese punctuation marks; maximum entropy model;
sentence realization",
}
Related entries
- acceptable,
11(3)8,
12(2)6
- according,
5(2)146,
5(2)165,
6(2)7,
7(2)7,
7(4)11,
8(1)3,
8(2)7,
10(1)3,
10(4)19,
12(4)17,
13(2)9
- achieve,
7(3)9,
7(3)10,
7(4)12,
7(4)13,
8(1)4,
8(2)7,
8(3)10,
8(4)17,
9(4)14,
10(2)7,
11(3)8,
11(3)11,
11(4)15,
11(4)17,
12(1)3,
12(1)4,
12(2)7,
12(3)9,
12(3)11,
13(1)3,
13(1)4,
13(2)9
- analysis,
2(4)301,
3(2)94,
3(3)169,
4(3)263,
7(1)1,
7(1)3,
7(2)5,
7(3)9,
8(1)2,
8(3)12,
8(4)19,
9(2)7,
9(3)11,
9(4)15,
10(1)4,
10(3)16,
10(4)20,
11(2)4,
11(2)7,
11(4)16,
11(4)18,
12(2)6,
13(2)9,
13(3)11
- article,
3(4)227,
4(3)321,
5(2)121,
6(2)6,
6(2)7,
6(2)8,
6(4)3,
7(1)1,
7(1)3,
7(2)5,
7(2)6,
7(2)7,
7(3)8,
7(3)9,
7(4)11,
7(4)12,
7(4)13,
8(1)2,
8(1)3,
8(1)4,
8(2)6,
8(2)8,
8(2)9,
8(3)10,
8(3)11,
8(3)12,
8(4)14,
8(4)16,
8(4)17,
8(4)18,
9(1)2,
9(1)4,
9(3)10,
9(3)11,
9(3)12,
9(4)13,
9(4)14,
10(1)3,
10(1)5,
10(1)6,
10(2)7,
10(2)9,
10(2)10,
10(3)12,
10(3)13,
10(3)14,
10(3)15,
10(4)17,
10(4)18,
10(4)20,
10(4)21,
11(1)1,
11(2)4,
11(2)5,
11(2)7,
11(3)8,
11(3)10,
11(3)11,
11(4)13,
11(4)14,
11(4)15,
11(4)16,
11(4)17,
11(4)18,
12(1)1,
12(1)3,
12(1)4,
12(2)5,
12(2)6,
12(2)7,
12(3)9,
12(3)10,
12(3)11,
12(3)12,
12(4)14,
13(1)1,
13(1)2,
13(1)3,
13(1)4,
13(2)6,
13(2)7,
13(2)8,
13(2)9,
13(3)12,
13(3)13,
13(4)16
- automatic,
1(3)207,
3(1)51,
5(2)165,
6(2)6,
7(1)2,
8(4)15,
8(4)16,
8(4)18,
9(1)2,
9(2)7,
10(1)5,
10(2)7,
10(3)12,
10(3)13,
10(4)21,
12(2)7,
12(4)14,
12(4)15,
13(3)13
- automatically,
3(4)227,
5(2)89,
5(2)121,
5(2)165,
7(1)1,
7(2)6,
8(1)3,
8(2)7,
8(3)10,
10(3)12,
11(2)6,
11(4)16,
12(2)7,
12(4)15,
13(3)12,
13(4)16
- based,
1(3)269,
2(4)301,
3(2)113,
4(3)357,
4(4)377,
5(2)121,
5(2)165,
5(3)185,
6(2)8,
6(3)9,
6(3)11,
6(4)3,
7(2)6,
7(3)10,
7(4)12,
8(1)4,
8(3)11,
8(3)12,
8(4)17,
8(4)19,
9(1)2,
9(2)5,
9(2)7,
9(3)11,
10(1)4,
10(1)6,
10(2)7,
10(3)14,
10(3)16,
11(2)6,
11(3)8,
11(3)11,
11(4)14,
11(4)15,
11(4)16,
11(4)17,
11(4)18,
12(1)1,
12(1)2,
12(1)4,
12(3)9,
12(3)10,
12(3)11,
13(1)2,
13(1)3,
13(1)4,
13(2)10,
13(3)13,
13(4)17,
13(4)18
- baseline,
7(1)1,
7(2)5,
7(2)6,
7(3)8,
8(1)4,
8(3)10,
9(1)3,
9(1)4,
9(2)7,
9(3)12,
11(4)13,
11(4)14
- choice,
11(4)13,
12(4)14,
12(4)15,
13(4)17
- construct,
9(1)1,
10(2)7,
10(2)9,
12(3)11
- data,
2(2)143,
6(1)z,
6(1)z-1,
6(2)7,
6(3)11,
7(1)3,
7(3)9,
7(4)13,
8(1)3,
8(2)7,
8(3)10,
8(3)11,
8(3)12,
8(4)16,
8(4)18,
10(2)7,
10(3)12,
10(4)20,
11(2)4,
11(3)10,
11(3)11,
11(4)13,
11(4)14,
11(4)18,
12(1)1,
12(2)7,
12(3)9,
13(1)2,
13(1)3,
13(1)4,
13(4)16,
13(4)17,
13(4)18
- defined,
5(2)165,
6(3)10,
7(3)8
- different,
5(2)89,
6(3)9,
6(4)3,
7(2)7,
7(3)8,
7(4)13,
8(1)2,
8(2)7,
8(2)8,
8(3)11,
8(4)16,
8(4)17,
9(1)1,
9(1)4,
9(2)5,
9(3)12,
10(1)4,
10(1)5,
10(3)12,
10(4)17,
10(4)19,
11(3)8,
11(3)11,
11(4)16,
11(4)17,
11(4)18,
12(1)2,
12(3)11,
12(4)17,
13(2)6,
13(3)11,
13(4)16
- entropy,
6(2)7,
12(4)14
- error,
4(1)18,
6(3)9,
7(1)2,
7(3)10,
9(1)2,
10(1)2,
10(1)5,
10(1)6,
10(2)7,
10(2)10,
11(1)3,
11(2)7,
11(4)18,
12(1)2,
13(2)8,
13(3)14
- Evaluating,
11(4)13
- evaluation,
3(4)243,
5(2)165,
6(1)z-2,
7(2)7,
7(4)13,
8(2)6,
8(3)12,
8(4)15,
8(4)16,
9(1)3,
9(3)10,
9(3)11,
9(3)12,
9(4)14,
10(3)15,
11(2)5,
11(4)13,
11(4)18,
12(2)6,
12(4)15,
13(1)1
- experiment,
2(2)101,
2(2)143,
5(2)146,
5(2)165,
5(3)245,
6(2)7,
6(2)8,
7(1)1,
7(1)2,
7(1)3,
7(2)5,
7(4)11,
8(1)2,
8(2)6,
8(3)11,
8(4)17,
9(2)7,
9(3)11,
9(3)12,
10(4)20,
11(2)4,
11(2)5,
11(2)7,
11(3)10,
11(3)11,
11(4)15,
11(4)17,
12(1)2,
12(2)5,
12(2)7,
12(3)9,
12(3)11,
12(3)12,
12(4)16,
12(4)17,
13(1)2,
13(1)3,
13(2)6,
13(2)8,
13(3)13,
13(4)18
- extract,
6(3)11,
7(3)8,
7(3)10,
9(1)1,
11(1)2,
11(2)4,
11(2)6,
11(3)11,
11(4)15,
12(2)7,
12(3)9
- fact,
7(2)7,
10(3)14,
13(2)8
- far,
8(2)8,
13(1)1
- feature,
2(3)290,
5(2)165,
6(4)1,
7(2)6,
7(2)7,
7(3)10,
7(4)13,
8(3)11,
8(4)14,
8(4)17,
9(1)2,
9(2)5,
10(1)5,
10(1)6,
10(2)7,
10(3)13,
10(3)15,
10(4)17,
10(4)19,
10(4)21,
11(3)10,
11(4)14,
11(4)16,
12(1)1,
12(1)4,
12(3)9,
12(3)10,
13(2)9,
13(3)13,
13(4)16
- finally,
5(2)165,
7(3)8,
8(1)4,
10(3)13,
11(2)5
- Furthermore,
6(2)7,
7(1)1,
7(4)11,
8(4)17,
11(2)6,
12(1)2,
12(2)7,
12(4)16,
13(1)1
- generating,
5(2)165,
7(3)8,
8(1)2,
8(4)19,
12(4)15
- generation,
2(3)270,
5(2)165,
7(1)3,
8(1)4,
12(2)5,
12(3)9
- gold,
11(4)18
- human,
7(3)8,
8(1)2,
10(1)2,
10(2)10,
12(1)1,
12(4)15,
13(2)10
- imperfect,
11(1)3
- implement,
7(3)9,
7(4)12,
8(4)15,
9(3)11
- incorporating,
5(2)121,
6(4)1,
7(2)5,
8(3)10,
8(4)15,
11(2)4
- indicating,
10(3)15
- input,
5(2)165,
8(2)7,
9(1)1,
9(1)3,
10(1)6,
10(4)20,
12(1)2,
12(1)4,
12(2)6,
13(1)4,
13(2)8,
13(3)12,
13(4)17
- investigate,
8(1)3,
8(2)8,
8(4)16,
8(4)17,
9(1)4,
11(4)14,
12(4)14,
13(2)9
- linguistically,
11(3)10
- manual,
8(1)3,
13(1)4
- maximum,
6(2)7,
8(2)6,
12(1)1,
12(3)9
- more,
5(2)146,
6(2)7,
6(3)10,
7(3)9,
7(4)13,
8(1)4,
8(2)7,
8(3)12,
8(4)14,
8(4)16,
9(1)2,
9(3)11,
9(3)12,
10(1)4,
10(4)19,
11(2)4,
11(2)7,
12(1)1,
12(1)2,
12(3)9,
13(1)1,
13(1)3,
13(1)4,
13(2)8,
13(4)18
- national,
6(3)10,
7(3)9,
11(4)13,
12(1)3,
13(2)8
- natural,
1(2)123,
3(1)11,
5(2)121,
5(4)291,
6(2)7,
7(1)1,
7(4)13,
8(1)2,
8(2)9,
8(4)13,
8(4)14,
8(4)16,
8(4)19,
9(3)11,
9(4)15,
10(3)14,
10(4)20,
11(1)2,
11(4)14,
11(4)15,
12(1)3,
13(3)14
- not,
5(2)89,
5(2)121,
6(3)10,
7(2)7,
7(3)8,
7(3)10,
7(4)11,
7(4)12,
7(4)13,
8(1)3,
8(2)7,
8(2)8,
8(2)9,
8(4)17,
9(2)7,
9(4)14,
10(1)4,
10(1)5,
10(2)8,
10(2)10,
10(4)18,
11(1)2,
11(3)10,
12(1)2,
12(1)3,
12(2)7,
12(3)9,
13(2)7,
13(2)9,
13(4)17,
13(4)18
- outperform,
6(4)3,
7(2)6,
7(3)10,
7(4)13,
8(2)6,
9(2)5,
10(3)15,
11(2)7,
11(3)8,
11(4)14,
12(2)5,
13(3)13,
13(4)17
- output,
7(1)2,
8(1)4,
8(2)6,
8(3)10,
9(1)2,
10(1)6,
10(2)9,
10(4)18,
13(2)7
- Penn,
10(1)5
- performance,
5(2)121,
5(2)165,
6(2)8,
6(3)9,
6(4)1,
6(4)3,
7(1)1,
7(1)2,
7(2)5,
7(2)6,
7(2)7,
7(3)9,
7(3)10,
7(4)13,
8(1)2,
8(1)3,
8(2)7,
8(2)8,
8(2)9,
8(3)10,
8(4)16,
8(4)17,
8(4)18,
9(1)2,
9(1)4,
9(2)5,
9(3)11,
9(3)12,
9(4)14,
10(2)8,
10(3)13,
10(3)14,
11(2)7,
11(3)10,
11(3)11,
11(4)14,
11(4)15,
11(4)17,
12(1)2,
12(3)9,
12(3)11,
12(4)14,
12(4)15,
12(4)16,
13(1)3,
13(1)4,
13(2)7,
13(2)9,
13(4)16,
13(4)17
- perspective,
9(3)11,
10(4)20,
12(4)13,
13(1)5,
13(2)10
- precise,
7(1)1
- present,
5(2)89,
5(2)165,
6(2)7,
6(3)10,
6(4)2,
7(1)2,
7(1)3,
7(2)7,
7(3)9,
7(4)11,
7(4)13,
8(1)3,
8(2)6,
8(2)7,
8(2)8,
8(3)10,
8(4)14,
8(4)16,
8(4)17,
8(4)18,
8(4)19,
9(1)1,
9(1)2,
9(1)3,
9(4)14,
10(1)4,
10(1)6,
10(2)7,
10(3)14,
10(4)18,
10(4)19,
11(1)2,
11(1)3,
11(2)4,
11(2)5,
11(2)6,
11(3)10,
11(4)13,
12(1)3,
12(2)5,
12(3)9,
12(3)11,
12(4)15,
13(2)8,
13(4)16
- processing,
3(1)1,
3(4)213,
4(4)375,
5(2)121,
5(4)291,
6(2)7,
6(3)9,
6(3)10,
6(4)3,
7(1)1,
7(1)3,
7(2)7,
7(4)13,
8(1)2,
8(2)9,
8(4)13,
8(4)14,
8(4)16,
8(4)18,
8(4)19,
9(2)5,
9(3)11,
9(4)15,
10(3)11,
10(3)14,
10(4)20,
11(1)2,
11(3)9,
11(4)15,
12(1)3,
12(3)10,
13(1)1,
13(2)7
- property,
6(2)7,
8(3)11,
8(4)14,
11(3)9
- punctuation,
8(4)16,
13(2)9
- range,
7(4)11,
7(4)13
- relatively,
8(2)8,
8(3)10,
8(4)17,
10(3)14
- result,
4(2)135,
5(2)121,
5(2)146,
5(2)165,
6(2)6,
6(2)7,
6(3)9,
6(3)11,
6(4)3,
7(1)2,
7(2)5,
7(2)6,
7(2)7,
7(3)8,
7(3)10,
7(4)11,
7(4)12,
7(4)13,
8(1)2,
8(1)3,
8(1)4,
8(2)6,
8(2)9,
8(3)10,
8(3)12,
8(4)14,
8(4)15,
8(4)16,
8(4)17,
8(4)18,
8(4)19,
9(1)1,
9(1)2,
9(2)5,
9(2)7,
9(3)11,
9(3)12,
9(4)14,
10(1)2,
10(2)7,
11(2)4,
11(2)5,
11(3)8,
11(3)9,
11(3)11,
11(4)13,
11(4)14,
11(4)15,
12(1)3,
12(1)4,
12(2)5,
12(2)7,
12(3)9,
12(3)10,
12(3)11,
12(4)14,
12(4)16,
13(1)1,
13(1)4,
13(2)6,
13(2)7,
13(2)9,
13(3)11,
13(3)12,
13(3)14
- same,
7(4)11,
9(1)3,
10(2)10,
11(2)6,
12(4)16,
13(2)6
- score,
7(2)7,
7(4)12,
8(2)7,
8(3)10,
9(1)3,
10(4)18,
11(2)6,
12(3)9,
12(4)17,
13(1)2,
13(3)13,
13(4)16
- sentence,
1(3)173,
3(2)146,
4(3)321,
4(4)377,
5(2)121,
5(2)146,
5(2)165,
7(1)3,
7(2)6,
7(4)13,
8(1)3,
8(2)8,
9(1)2,
10(4)21,
11(1)3,
11(2)5,
11(2)6,
11(3)8,
11(3)10,
11(3)11,
12(1)2,
12(1)3,
12(2)7,
12(4)14,
12(4)17,
13(1)2,
13(3)11,
13(4)17
- show,
5(2)89,
5(2)146,
7(1)1,
7(1)2,
7(1)3,
7(4)11,
7(4)12,
7(4)13,
8(1)4,
8(2)7,
8(2)9,
8(3)12,
8(4)16,
8(4)17,
9(1)1,
9(1)2,
9(1)3,
9(2)5,
9(2)7,
9(3)11,
9(3)12,
9(4)14,
10(1)3,
10(3)15,
11(2)4,
11(2)5,
11(2)7,
11(3)8,
11(3)11,
11(4)14,
11(4)15,
11(4)17,
11(4)18,
12(1)2,
12(1)4,
12(2)5,
12(2)7,
12(3)9,
12(3)10,
12(3)11,
12(4)15,
12(4)16,
13(1)3,
13(2)6,
13(2)7,
13(2)9,
13(3)14
- significantly,
7(1)1,
7(4)13,
8(2)6,
8(4)15,
8(4)18,
9(1)2,
9(3)12,
11(2)6,
11(4)13,
11(4)17,
12(4)16,
13(2)6,
13(4)17
- standard,
6(3)10,
7(4)11,
8(4)14,
8(4)15,
8(4)18,
9(1)1,
9(2)5,
10(3)12,
11(2)5,
11(4)18,
13(2)7,
13(3)13,
13(4)17
- statistical,
1(1)3,
3(2)87,
3(4)243,
5(2)121,
5(4)323,
5(4)360,
6(1)z-4,
7(1)1,
8(1)2,
8(1)4,
8(2)6,
8(2)7,
8(2)8,
8(2)9,
8(3)10,
8(4)15,
8(4)19,
9(2)7,
9(3)11,
10(4)18,
11(2)6,
11(2)7,
11(3)8,
11(4)15,
12(1)1,
12(3)12,
12(4)14,
12(4)16,
12(4)17,
13(1)2,
13(1)3,
13(1)4,
13(4)17
- study,
4(2)159,
4(3)243,
5(2)121,
5(2)146,
5(2)165,
5(3)209,
6(2)6,
6(2)7,
8(1)3,
8(1)4,
8(4)16,
9(2)5,
9(2)7,
9(3)11,
10(2)10,
10(3)12,
10(4)17,
10(4)18,
11(1)3,
11(2)6,
11(3)9,
11(3)11,
11(4)14,
13(1)3,
13(2)7,
13(3)11,
13(3)12,
13(3)14
- subject,
5(2)146,
11(4)13
- surface,
7(4)12,
8(2)6,
9(1)3,
11(4)15,
12(3)11
- syntactic,
6(4)1,
7(2)6,
7(4)12,
7(4)13,
8(1)4,
8(4)15,
10(3)12,
10(3)15,
10(4)17,
11(3)8,
11(4)14,
11(4)15,
11(4)18,
12(4)17,
13(2)9
- task,
2(1)49,
5(2)89,
5(2)121,
6(2)7,
6(3)11,
6(4)1,
6(4)3,
7(1)1,
7(1)2,
7(2)7,
7(3)10,
7(4)13,
8(1)4,
8(2)7,
8(4)15,
8(4)16,
9(1)4,
9(3)10,
9(4)14,
9(4)15,
10(1)5,
10(3)14,
10(4)18,
10(4)20,
10(4)21,
11(1)2,
11(3)8,
11(3)11,
11(4)13,
11(4)14,
11(4)17,
11(4)18,
12(1)2,
12(1)3,
12(2)5,
12(2)7,
12(3)9,
12(4)17,
13(2)10,
13(4)17
- theoretical,
6(3)10
- three,
1(2)145,
5(2)165,
7(2)7,
7(3)8,
8(1)4,
8(3)12,
9(2)5,
9(3)11,
10(1)5,
10(4)18,
11(4)17,
13(3)11,
13(4)16
- treebank,
8(3)10,
8(4)16,
10(1)5,
10(3)12,
10(4)18,
11(3)9,
13(2)9
- type,
7(4)11,
8(4)17,
9(3)11,
9(3)12,
10(4)19,
11(1)2,
11(1)3,
11(4)18,
12(3)10,
12(3)11,
13(1)3,
13(2)6,
13(3)11
- usage,
12(4)14,
12(4)15
- used,
5(2)89,
5(2)146,
7(1)3,
7(2)6,
7(2)7,
7(3)9,
7(4)12,
7(4)13,
8(3)10,
8(4)17,
9(1)1,
9(1)3,
9(3)10,
10(1)2,
10(1)6,
10(2)7,
10(2)8,
10(3)12,
10(3)13,
10(4)20,
11(1)2,
11(1)3,
11(3)10,
11(4)13,
11(4)14,
12(2)5,
12(3)9,
12(3)11,
12(3)12,
13(2)6,
13(3)11