Entry Sharma:2008:AMI from talip.bib
Last update: Sun Oct 15 02:55:04 MDT 2017
Top |
Symbols |
Numbers |
Math |
A |
B |
C |
D |
E |
F |
G |
H |
I |
J |
K |
L |
M |
N |
O |
P |
Q |
R |
S |
T |
U |
V |
W |
X |
Y |
Z
BibTeX entry
@Article{Sharma:2008:AMI,
author = "Utpal Sharma and Jugal K. Kalita and Rajib K. Das",
title = "Acquisition of Morphology of an {Indic} Language from
Text Corpus",
journal = j-TALIP,
volume = "7",
number = "3",
pages = "9:1--9:??",
month = aug,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1386869.1386871",
ISSN = "1530-0226 (print), 1558-3430 (electronic)",
ISSN-L = "1530-0226",
bibdate = "Fri Aug 22 13:11:51 MDT 2008",
bibsource = "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/talip.bib",
abstract = "This article describes an approach to unsupervised
learning of morphology from an unannotated corpus for a
highly inflectional Indo-European language called
Assamese spoken by about 30 million people. Although
Assamese is one of India's national languages, it
utterly lacks computational linguistic resources. There
exists no prior computational work on this language
spoken widely in northeast India. The work presented is
pioneering in this respect. In this article, we discuss
salient issues in Assamese morphology where the
presence of a large number of suffixal determiners,
sandhi, samas, and the propensity to use suffix
sequences make approximately 50\% of the words used in
written and spoken text inflected. We implement methods
proposed by Gaussier and Goldsmith on acquisition of
morphological knowledge, and obtain F-measure
performance below 60\%. This motivates us to present a
method more suitable for handling suffix sequences,
enabling us to increase the F-measure performance of
morphology acquisition to almost 70\%. We describe how
we build a morphological dictionary for Assamese from
the text corpus. Using the morphological knowledge
acquired and the morphological dictionary, we are able
to process small chunks of data at a time as well as a
large corpus. We achieve approximately 85\% precision
and recall during the analysis of small chunks of
coherent text.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Asian Language Information
Processing",
journal-URL = "http://portal.acm.org/browse_dl.cfm?&idx=J820",
keywords = "Assamese; Indo-European languages; machine learning;
morphology",
}
Related entries
- able,
7(4)13,
8(3)12,
8(4)18,
11(2)4,
11(2)5
- achieve,
7(3)10,
7(4)12,
7(4)13,
8(1)4,
8(2)7,
8(3)10,
8(4)17,
9(2)6,
9(4)14,
10(2)7,
11(3)8,
11(3)11,
11(4)15,
11(4)17,
12(1)3,
12(1)4,
12(2)7,
12(3)9,
12(3)11,
13(1)3,
13(1)4,
13(2)9
- acquired,
7(2)6,
11(4)16
- almost,
6(2)6,
9(3)11,
9(3)12,
11(1)1,
13(1)1
- Although,
5(2)121,
10(2)8,
12(2)5,
13(1)3
- analysis,
2(4)301,
3(2)94,
3(3)169,
4(3)263,
7(1)1,
7(1)3,
7(2)5,
8(1)2,
8(3)12,
8(4)19,
9(2)6,
9(2)7,
9(3)11,
9(4)15,
10(1)4,
10(3)16,
10(4)20,
11(2)4,
11(2)7,
11(4)16,
11(4)18,
12(2)6,
13(2)9,
13(3)11
- approximately,
10(1)2
- article,
3(4)227,
4(3)321,
5(2)121,
6(2)6,
6(2)7,
6(2)8,
6(4)3,
7(1)1,
7(1)3,
7(2)5,
7(2)6,
7(2)7,
7(3)8,
7(4)11,
7(4)12,
7(4)13,
8(1)2,
8(1)3,
8(1)4,
8(2)6,
8(2)8,
8(2)9,
8(3)10,
8(3)11,
8(3)12,
8(4)14,
8(4)16,
8(4)17,
8(4)18,
9(1)2,
9(1)4,
9(2)6,
9(3)10,
9(3)11,
9(3)12,
9(4)13,
9(4)14,
10(1)3,
10(1)5,
10(1)6,
10(2)7,
10(2)9,
10(2)10,
10(3)12,
10(3)13,
10(3)14,
10(3)15,
10(4)17,
10(4)18,
10(4)20,
10(4)21,
11(1)1,
11(2)4,
11(2)5,
11(2)7,
11(3)8,
11(3)10,
11(3)11,
11(4)13,
11(4)14,
11(4)15,
11(4)16,
11(4)17,
11(4)18,
12(1)1,
12(1)3,
12(1)4,
12(2)5,
12(2)6,
12(2)7,
12(3)9,
12(3)10,
12(3)11,
12(3)12,
12(4)14,
13(1)1,
13(1)2,
13(1)3,
13(1)4,
13(2)6,
13(2)7,
13(2)8,
13(2)9,
13(3)12,
13(3)13,
13(4)16
- Assamese,
11(1)1,
13(3)14
- build,
8(2)9,
8(3)12,
13(1)3,
13(1)4
- called,
6(2)8,
7(3)10,
8(3)12,
12(1)1,
12(3)12,
13(1)2,
13(4)18
- chunk,
7(1)1,
12(1)2,
12(3)10,
13(2)9
- computational,
9(1)3,
10(1)1,
12(2)5,
13(2)10
- corpus,
4(1)18,
4(4)400,
5(2)165,
6(3)9,
6(3)11,
7(2)6,
7(4)13,
8(4)17,
9(2)5,
9(2)7,
9(4)14,
10(2)7,
11(2)5,
11(3)9,
11(3)10,
11(3)11,
11(4)16,
12(1)1,
12(1)2,
12(1)3,
13(1)3
- data,
2(2)143,
6(1)z,
6(1)z-1,
6(2)7,
6(3)11,
7(1)3,
7(4)13,
8(1)3,
8(2)7,
8(3)10,
8(3)11,
8(3)12,
8(4)16,
8(4)18,
9(2)6,
10(2)7,
10(3)12,
10(4)20,
11(2)4,
11(3)10,
11(3)11,
11(4)13,
11(4)14,
11(4)18,
12(1)1,
12(2)7,
12(3)9,
13(1)2,
13(1)3,
13(1)4,
13(4)16,
13(4)17,
13(4)18
- describe,
6(3)9,
6(4)3,
7(2)5,
7(2)6,
7(2)7,
7(4)12,
8(4)14,
9(3)10,
9(3)11,
10(1)5,
11(2)6,
12(1)3,
12(2)5,
13(1)1,
13(1)4,
13(2)9
- dictionary,
1(4)281,
5(2)121,
6(3)11,
9(1)4,
10(1)3,
10(2)7,
11(2)6,
11(4)16,
12(2)7,
13(3)14
- discuss,
6(3)10,
9(3)10,
11(1)1
- during,
9(2)7,
11(1)1,
12(4)15,
13(3)14
- enabling,
12(3)9,
13(4)18
- European, Indo-,
9(3)11,
11(2)6,
12(4)14,
13(2)7
- exist,
5(2)89,
9(4)13,
12(4)16
- handling,
5(2)121,
7(1)2,
10(4)17,
12(3)9
- highly,
5(2)121,
6(3)9,
6(4)2,
8(4)18,
10(2)8,
10(4)20,
11(2)4,
12(2)5,
13(1)4,
13(4)17
- how,
5(2)146,
6(4)3,
7(1)2,
7(4)12,
8(4)14,
8(4)16,
9(2)5,
9(3)12,
11(3)8,
12(4)14,
13(2)9,
13(2)10
- implement,
7(4)12,
8(4)15,
9(2)6,
9(3)11
- increase,
7(2)5,
8(2)7,
8(4)15,
9(3)12,
9(4)15,
10(2)8,
11(2)6,
11(4)18,
12(2)6,
12(3)11
- India,
11(1)1,
13(2)8,
13(3)14
- Indic,
9(3)11,
12(2)6
- Indo-European,
9(3)11,
11(2)6,
12(4)14,
13(2)7
- inflected,
8(4)18
- inflectional,
6(3)9,
9(3)11,
10(1)4,
10(2)8
- issue,
3(1)1,
3(2)87,
3(4)213,
4(3)237,
4(4)375,
5(4)291,
6(2)7,
6(3)10,
7(1)2,
7(1)3,
7(2)4,
7(4)11,
8(1)1,
8(2)5,
8(2)6,
8(3)12,
8(4)13,
9(3)9,
10(1)1,
10(3)11,
10(3)13,
10(4)18,
11(1)2,
11(2)6,
11(4)12
- knowledge,
4(4)435,
5(1)4,
5(1)74,
5(2)121,
5(2)146,
7(4)12,
8(1)4,
9(1)3,
11(1)3,
11(2)4,
11(4)13,
11(4)15,
11(4)16,
11(4)17,
11(4)18,
13(4)16
- lack,
8(4)14,
9(1)1,
12(2)6,
12(2)7
- large,
6(2)8,
6(3)9,
6(3)11,
7(3)8,
7(4)11,
8(1)2,
8(3)12,
8(4)18,
9(4)14,
10(2)7,
10(4)20,
10(4)21,
11(1)2,
11(3)8,
12(2)6,
12(3)9,
13(1)3,
13(2)7,
13(2)8,
13(3)13,
13(4)17,
13(4)18
- learning,
3(2)159,
5(1)61,
5(2)121,
5(4)413,
6(2)6,
6(4)1,
6(4)3,
7(2)7,
8(3)10,
8(4)15,
9(1)3,
9(2)5,
10(1)5,
10(2)10,
10(3)16,
10(4)20,
10(4)21,
11(1)3,
11(4)14,
11(4)16,
12(1)1,
12(1)3,
12(2)5,
12(2)7,
12(4)15,
13(1)2,
13(1)3,
13(2)9,
13(4)16,
13(4)17
- linguistic,
2(2)101,
6(3)10,
7(4)11,
7(4)13,
8(4)15,
8(4)17,
9(1)2,
9(1)3,
9(2)5,
10(1)1,
10(2)8,
11(3)10,
11(4)16,
11(4)18,
12(1)2,
12(3)11,
13(3)11
- machine,
4(1)18,
4(4)377,
5(2)89,
5(3)185,
6(2)6,
6(4)2,
6(4)3,
7(1)1,
7(2)5,
7(2)7,
7(3)10,
8(1)4,
8(2)5,
8(2)6,
8(2)7,
8(2)8,
8(2)9,
8(3)10,
8(4)15,
9(1)3,
9(4)13,
10(1)2,
10(1)5,
10(3)16,
10(4)18,
10(4)20,
11(1)2,
11(3)8,
11(4)14,
11(4)16,
12(3)9,
12(3)12,
12(4)14,
12(4)16,
12(4)17,
13(1)2,
13(3)11,
13(4)16,
13(4)17
- make,
6(4)2,
8(2)9,
9(3)11,
11(2)6,
11(4)15,
12(4)14,
12(4)15,
13(1)1,
13(1)3,
13(2)8
- million,
6(3)10,
11(1)1
- more,
5(2)146,
6(2)7,
6(3)10,
7(4)13,
8(1)4,
8(2)7,
8(3)12,
8(4)14,
8(4)16,
9(1)2,
9(2)6,
9(3)11,
9(3)12,
10(1)4,
10(4)19,
11(2)4,
11(2)7,
12(1)1,
12(1)2,
12(3)9,
13(1)1,
13(1)3,
13(1)4,
13(2)8,
13(4)18
- morphological,
6(4)2,
8(4)16,
9(1)3,
9(4)15,
10(1)4,
11(3)9,
13(2)9,
13(3)14
- morphology,
8(4)14,
8(4)16,
9(3)11,
10(1)4
- motivate,
10(1)3
- national,
6(3)10,
9(2)6,
11(4)13,
12(1)3,
13(2)8
- number,
6(2)8,
6(3)9,
6(3)11,
7(1)3,
7(4)11,
8(1)3,
8(4)18,
9(3)12,
10(1)5,
10(2)8,
10(4)21,
11(1)2,
11(3)8,
11(3)9,
12(1)2,
12(2)7,
12(3)9,
13(2)7,
13(3)12,
13(3)13
- obtain,
7(2)7,
7(3)8,
8(4)15,
9(2)5,
9(3)12,
10(3)12,
11(3)8,
12(3)10,
12(4)17,
13(3)14
- one,
5(2)89,
5(2)121,
6(2)6,
6(3)9,
6(4)3,
7(3)8,
7(4)11,
7(4)13,
8(2)9,
8(4)16,
8(4)17,
9(1)1,
9(2)5,
9(2)7,
9(3)12,
9(4)14,
10(1)5,
10(3)12,
10(3)13,
10(4)19,
11(2)4,
11(2)6,
11(2)7,
11(4)14,
12(1)1,
12(1)2,
12(2)5,
12(2)7,
12(3)11,
12(4)16,
13(1)4,
13(2)10,
13(4)17,
13(4)18
- people,
5(2)146,
6(3)10,
11(1)1,
11(1)2,
12(1)3
- performance,
5(2)121,
5(2)165,
6(2)8,
6(3)9,
6(4)1,
6(4)3,
7(1)1,
7(1)2,
7(2)5,
7(2)6,
7(2)7,
7(3)10,
7(4)13,
8(1)2,
8(1)3,
8(2)7,
8(2)8,
8(2)9,
8(3)10,
8(4)16,
8(4)17,
8(4)18,
9(1)2,
9(1)4,
9(2)5,
9(2)6,
9(3)11,
9(3)12,
9(4)14,
10(2)8,
10(3)13,
10(3)14,
11(2)7,
11(3)10,
11(3)11,
11(4)14,
11(4)15,
11(4)17,
12(1)2,
12(3)9,
12(3)11,
12(4)14,
12(4)15,
12(4)16,
13(1)3,
13(1)4,
13(2)7,
13(2)9,
13(4)16,
13(4)17
- pioneering,
8(4)14
- precision,
9(3)11,
10(2)8,
11(3)11,
11(4)18,
12(1)1,
13(2)6
- present,
5(2)89,
5(2)165,
6(2)7,
6(3)10,
6(4)2,
7(1)2,
7(1)3,
7(2)7,
7(4)11,
7(4)13,
8(1)3,
8(2)6,
8(2)7,
8(2)8,
8(3)10,
8(4)14,
8(4)16,
8(4)17,
8(4)18,
8(4)19,
9(1)1,
9(1)2,
9(1)3,
9(2)6,
9(4)14,
10(1)4,
10(1)6,
10(2)7,
10(3)14,
10(4)18,
10(4)19,
11(1)2,
11(1)3,
11(2)4,
11(2)5,
11(2)6,
11(3)10,
11(4)13,
12(1)3,
12(2)5,
12(3)9,
12(3)11,
12(4)15,
13(2)8,
13(4)16
- presented,
6(2)8,
6(4)3,
8(4)15,
11(1)3
- prior,
11(2)4,
13(3)14
- process,
5(2)121,
6(2)6,
6(3)10,
7(1)1,
10(2)10,
10(4)18,
12(3)9,
13(1)4,
13(2)9
- proposed,
5(2)121,
5(2)165,
6(2)7,
7(1)1,
7(1)2,
7(3)10,
7(4)11,
7(4)13,
8(1)4,
8(2)6,
8(3)10,
8(3)11,
8(4)14,
8(4)19,
9(1)1,
9(2)5,
9(2)7,
10(2)7,
10(2)9,
10(3)14,
10(4)18,
11(1)3,
11(2)5,
11(2)6,
11(2)7,
11(3)8,
11(3)9,
11(3)10,
11(3)11,
11(4)16,
11(4)17,
12(1)4,
12(2)5,
12(2)7,
12(3)12,
12(4)16,
12(4)17,
13(2)6,
13(2)8,
13(3)13,
13(4)18
- recall,
10(2)8,
11(4)18
- resource,
2(2)101,
2(2)124,
4(2)135,
5(2)89,
7(4)11,
8(2)9,
8(3)12,
8(4)17,
9(2)5,
9(3)12,
9(4)14,
10(2)8,
10(3)12,
10(3)16,
10(4)19,
11(4)14,
11(4)16,
11(4)18,
12(1)1,
12(4)14,
12(4)16,
13(1)1,
13(1)3,
13(2)7
- respect,
8(4)15,
9(1)3,
11(2)6
- salient,
7(1)3,
11(2)6
- sequence,
5(2)165,
6(2)7,
6(3)10,
8(1)2,
8(1)3,
9(2)7,
9(3)12,
9(4)14,
10(1)4,
12(1)2,
12(1)3,
12(3)9,
12(3)10,
12(4)17,
13(1)2
- Sharma, Utpal,
13(3)14
- small,
6(3)11,
7(4)11,
8(2)9,
8(4)17,
9(1)3,
10(4)21,
12(2)7
- spoken,
2(1)1,
3(2)128,
6(3)10,
7(1)2,
8(1)1,
8(1)2,
8(1)3,
8(1)4,
8(4)18,
9(3)11,
12(2)6,
13(2)7
- suffix,
6(4)2,
8(4)16,
9(3)11,
12(3)11,
13(3)14
- suitable,
7(2)5,
9(3)12,
10(3)16,
11(2)4
- text,
1(1)34,
1(2)159,
3(3)190,
3(4)215,
4(1)38,
4(2)135,
4(4)435,
5(1)1,
5(2)165,
6(1)z-3,
6(3)10,
6(4)2,
7(2)6,
7(3)8,
8(1)4,
8(3)11,
8(4)14,
8(4)16,
8(4)18,
9(1)1,
9(3)10,
9(4)15,
10(3)14,
11(1)2,
11(2)4,
11(2)5,
11(4)13,
11(4)14,
11(4)15,
11(4)16,
11(4)17,
11(4)18,
12(1)2,
12(1)3,
12(2)6,
12(3)11,
12(4)15,
13(1)1,
13(1)4,
13(2)7,
13(2)8,
13(2)9,
13(2)10,
13(3)14
- there,
7(4)11,
8(2)7,
8(3)12,
8(4)17,
9(2)5,
9(3)12,
9(4)15,
10(1)2,
10(3)14,
12(1)2,
13(1)1,
13(2)8,
13(3)14
- time,
3(1)11,
3(1)66,
5(4)388,
6(4)3,
7(1)3,
11(3)8,
12(1)4,
12(2)6,
12(3)12,
13(1)1,
13(2)6,
13(3)12,
13(4)17
- unsupervised,
6(2)6,
8(1)3,
9(1)3,
9(1)4,
9(2)7,
10(2)7,
11(3)9,
13(3)12
- use,
4(2)159,
5(2)89,
5(2)146,
6(2)8,
6(3)11,
7(2)6,
7(4)11,
7(4)12,
8(1)3,
8(2)9,
8(3)10,
8(3)11,
9(1)1,
9(1)3,
9(3)11,
10(1)3,
10(1)4,
11(1)1,
11(2)6,
11(2)7,
11(3)8,
11(3)10,
11(4)14,
11(4)18,
12(1)1,
12(2)6,
12(3)9,
12(3)10,
13(2)6,
13(2)9,
13(2)10,
13(3)12
- used,
5(2)89,
5(2)146,
7(1)3,
7(2)6,
7(2)7,
7(4)12,
7(4)13,
8(3)10,
8(4)17,
9(1)1,
9(1)3,
9(2)6,
9(3)10,
10(1)2,
10(1)6,
10(2)7,
10(2)8,
10(3)12,
10(3)13,
10(4)20,
11(1)2,
11(1)3,
11(3)10,
11(4)13,
11(4)14,
12(2)5,
12(3)9,
12(3)11,
12(3)12,
13(2)6,
13(3)11
- well,
5(2)121,
6(2)6,
6(3)11,
7(3)8,
7(3)10,
8(1)2,
8(4)18,
9(1)2,
9(3)12,
10(3)15,
11(1)2,
11(2)4,
11(3)11,
12(3)9,
13(1)2
- where,
7(3)8,
7(3)10,
8(2)7,
8(2)8,
9(1)3,
9(2)7,
9(4)13,
10(4)18,
11(2)4,
11(4)16,
12(1)3,
12(4)14
- widely,
6(4)3,
10(3)13,
12(4)17,
13(3)11
- work,
5(2)121,
6(2)6,
6(3)11,
6(4)2,
7(2)7,
8(4)19,
9(2)5,
9(4)15,
10(1)4,
10(2)10,
12(1)3,
12(3)9,
13(1)1,
13(2)9,
13(3)14,
13(4)18
- written,
7(3)8,
8(4)18