@Preamble{
"\hyphenation{ }"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|https://www.math.utah.edu/~beebe/|"}
@String{j-TALLIP = "ACM Transactions on Asian and Low-Resource
Language Information Processing (TALLIP)"}
@Article{Uematsu:2015:IMD,
author = "Sumire Uematsu and Takuya Matsuzaki and Hiroki Hanaoka
and Yusuke Miyao and Hideki Mima",
title = "Integrating Multiple Dependency Corpora for Inducing
Wide-Coverage {Japanese} {CCG} Resources",
journal = j-TALLIP,
volume = "14",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2658997",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:48 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "A novel method to induce wide-coverage Combinatory
Categorial Grammar (CCG) resources for Japanese is
proposed in this article. For some languages including
English, the availability of large annotated corpora
and the development of data-based induction of
lexicalized grammar have enabled deep parsing, i.e.,
parsing based on lexicalized grammars. However, deep
parsing for Japanese has not been widely studied. This
is mainly because most Japanese syntactic resources are
represented in chunk-based dependency structures, while
previous methods for inducing grammars are dependent on
tree corpora. To translate syntactic information
presented in chunk-based dependencies to phrase
structures as accurately as possible, integration of
annotation from multiple dependency-based corpora is
proposed. Our method first integrates dependency
structures and predicate-argument information and
converts them into phrase structure trees. The trees
are then transformed into CCG derivations in a similar
way to previously proposed methods. The quality of the
conversion is empirically evaluated in terms of the
coverage of the obtained CCG lexicon and the accuracy
of the parsing with the grammar. While the transforming
process used in this study is specialized for Japanese,
the framework of our method would be applicable to
other languages for which dependency-based analysis has
been regarded as more appropriate than phrase
structure-based analysis due to morphosyntactic
features.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "1",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ramrakhiyani:2015:ATE,
author = "Nitin Ramrakhiyani and Prasenjit Majumder",
title = "Approaches to Temporal Expression Recognition in
{Hindi}",
journal = j-TALLIP,
volume = "14",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629574",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:48 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Temporal annotation of plain text is considered a
useful component of modern information retrieval tasks.
In this work, different approaches for identification
and classification of temporal expressions in Hindi are
developed and analyzed. First, a rule-based approach is
developed, which takes plain text as input and based on
a set of hand-crafted rules, produces a tagged output
with identified temporal expressions. This approach
performs with a strict F1-measure of 0.83. In another
approach, a CRF-based classifier is trained with human
tagged data and is then tested on a test dataset. The
trained classifier identifies the time expressions from
plain text and further classifies them to various
classes. This approach performs with a strict
F1-measure of 0.78. Next, the CRF is replaced by an
SVM-based classifier and the same experiment is
performed with the same features. This approach is
shown to be comparable to the CRF and performs with a
strict F1-measure of 0.77. Using the rule base
information as an additional feature enhances the
performances to 0.86 and 0.84 for the CRF and SVM
respectively. With three different comparable systems
performing the extraction task, merging them to take
advantage of their positives is the next step. As the
first merge experiment, rule-based tagged data is fed
to the CRF and SVM classifiers as additional training
data. Evaluation results report an increase in
F1-measure of the CRF from 0.78 to 0.8. Second, a
voting-based approach is implemented, which chooses the
best class for each token from the outputs of the three
approaches. This approach results in the best
performance for this task with a strict F1-measure of
0.88. In this process a reusable gold standard dataset
for temporal tagging in Hindi is also developed. Named
the ILTIMEX2012 corpus, it consists of 300 manually
tagged Hindi news documents.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "2",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kumari:2015:ITD,
author = "B. Venkata Seshu Kumari and Ramisetty Rajeshwara Rao",
title = "Improving {Telugu} Dependency Parsing using
Combinatory Categorial Grammar Supertags",
journal = j-TALLIP,
volume = "14",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2693190.2693191",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:48 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "We show that Combinatory Categorial Grammar (CCG)
supertags can improve Telugu dependency parsing. In
this process, we first extract a CCG lexicon from the
dependency treebank. Using both the CCG lexicon and the
dependency treebank, we create a CCG treebank using a
chart parser. Exploring different morphological
features of Telugu, we develop a supertagger using
maximum entropy models. We provide CCG supertags as
features to the Telugu dependency parser (MST parser).
We get an improvement of 1.8\% in the unlabelled
attachment score and 2.2\% in the labelled attachment
score. Our results show that CCG supertags improve the
MST parser, especially on verbal arguments for which it
has weak rates of recovery.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "3",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ketui:2015:EBA,
author = "Nongnuch Ketui and Thanaruk Theeramunkong and
Chutamanee Onsuwan",
title = "An {EDU}-Based Approach for {Thai} Multi-Document
Summarization and Its Application",
journal = j-TALLIP,
volume = "14",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2641567",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:48 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Due to lack of a word/phrase/sentence boundary,
summarization of Thai multiple documents has several
challenges in unit segmentation, unit selection,
duplication elimination, and evaluation dataset
construction. In this article, we introduce Thai
Elementary Discourse Units (TEDUs) and their
derivatives, called Combined TEDUs (CTEDUs), and then
present our three-stage method of Thai multi-document
summarization, that is, unit segmentation, unit-graph
formulation, and unit selection and summary generation.
To examine performance of our proposed method, a number
of experiments are conducted using 50 sets of Thai news
articles with their manually constructed reference
summaries. Based on measures of ROUGE-1, ROUGE-2, and
ROUGE-SU4, the experimental results show that: (1) the
TEDU-based summarization outperforms paragraph-based
summarization; (2) our proposed graph-based TEDU
weighting with importance-based selection achieves the
best performance; and (3) unit duplication
consideration and weight recalculation help improve
summary quality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "4",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sproat:2015:TPE,
author = "Richard Sproat",
title = "{TALLIP} Perspectives: Editorial Commentary: The
Broadened Focus of the Journal",
journal = j-TALLIP,
volume = "14",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2710043",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:48 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "5",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Shen:2015:MGA,
author = "Han-ping Shen and Chung-hsien Wu and Pei-shan Tsai",
title = "Model Generation of Accented Speech using Model
Transformation and Verification for Bilingual Speech
Recognition",
journal = j-TALLIP,
volume = "14",
number = "2",
pages = "6:1--6:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2661637",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Nowadays, bilingual or multilingual speech recognition
is confronted with the accent-related problem caused by
non-native speech in a variety of real-world
applications. Accent modeling of non-native speech is
definitely challenging, because the acoustic properties
in highly-accented speech pronounced by non-native
speakers are quite divergent. The aim of this study is
to generate highly Mandarin-accented English models for
speakers whose mother tongue is Mandarin. First, a
two-stage, state-based verification method is proposed
to extract the state-level, highly-accented speech
segments automatically. Acoustic features and
articulatory features are successively used for robust
verification of the extracted speech segments. Second,
Gaussian components of the highly-accented speech
models are generated from the corresponding Gaussian
components of the native speech models using a linear
transformation function. A decision tree is constructed
to categorize the transformation functions and used for
transformation function retrieval to deal with the data
sparseness problem. Third, a discrimination function is
further applied to verify the generated accented
acoustic models. Finally, the successfully verified
accented English models are integrated into the native
bilingual phone model set for Mandarin-English
bilingual speech recognition. Experimental results show
that the proposed approach can effectively alleviate
recognition performance degradation due to accents and
can obtain absolute improvements of 4.1\%, 1.8\%, and
2.7\% in word accuracy for bilingual speech recognition
compared to that using traditional ASR approaches,
MAP-adapted, and MLLR-adapted ASR methods,
respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "6",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Awajan:2015:KEA,
author = "Arafat Awajan",
title = "Keyword Extraction from {Arabic} Documents using Term
Equivalence Classes",
journal = j-TALLIP,
volume = "14",
number = "2",
pages = "7:1--7:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2665077",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "The rapid growth of the Internet and other computing
facilities in recent years has resulted in the creation
of a large amount of text in electronic form, which has
increased the interest in and importance of different
automatic text processing applications, including
keyword extraction and term indexing. Although keywords
are very useful for many applications, most documents
available online are not provided with keywords. We
describe a method for extracting keywords from Arabic
documents. This method identifies the keywords by
combining linguistics and statistical analysis of the
text without using prior knowledge from its domain or
information from any related corpus. The text is
preprocessed to extract the main linguistic
information, such as the roots and morphological
patterns of derivative words. A cleaning phase is then
applied to eliminate the meaningless words from the
text. The most frequent terms are clustered into
equivalence classes in which the derivative words
generated from the same root and the non-derivative
words generated from the same stem are placed together,
and their count is accumulated. A vector space model is
then used to capture the most frequent N-gram in the
text. Experiments carried out using a real-world
dataset show that the proposed method achieves good
results with an average precision of 31\% and average
recall of 53\% when tested against manually assigned
keywords.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "7",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sundaram:2015:BLM,
author = "Suresh Sundaram and A. G. Ramakrishnan",
title = "Bigram Language Models and Reevaluation Strategy for
Improved Recognition of Online Handwritten {Tamil}
Words",
journal = j-TALLIP,
volume = "14",
number = "2",
pages = "8:1--8:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2671014",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article describes a postprocessing strategy for
online, handwritten, isolated Tamil words.
Contributions have been made with regard to two issues
hardly addressed in the online Indic word recognition
literature, namely, use of (1) language models
exploiting the idiosyncrasies of Indic scripts and (2)
expert classifiers for the disambiguation of confused
symbols. The input word is first segmented into its
individual symbols, which are recognized using a
primary support vector machine (SVM) classifier.
Thereafter, we enhance the recognition accuracy by
utilizing (i) a bigram language model at the symbol or
character level and (ii) expert classifiers for
reevaluating and disambiguating the different sets of
confused symbols. The symbol-level bigram model is used
in a traditional Viterbi framework. The concept of a
character comprising multiple symbols is unique to
Dravidian languages such as Tamil. This multi-symbol
feature of Tamil characters has been exploited in
proposing a novel, prefix-tree-based character-level
bigram model that does not use Viterbi search; rather
it reduces the search space for each input symbol based
on its left context. For disambiguating confused
symbols, a dynamic time-warping approach is proposed to
automatically identify the parts of the online trace
that discriminates between the confused classes. Fine
classification of these regions by dedicated expert
SVMs reduces the extent of confusions between such
symbols. The integration of segmentation,
prefix-tree-based language model and disambiguation of
confused symbols is presented on a set of 15,000
handwritten isolated online Tamil words. Our results
show recognition accuracies of 93.0\% and 81.6\% at the
symbol and word level, respectively, as compared to the
baseline classifier performance of 88.4\% and 65.1\%,
respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "8",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhang:2015:TMT,
author = "Jiajun Zhang and Shujie Liu and Mu Li and Ming Zhou
and Chengqing Zong",
title = "Towards Machine Translation in Semantic Vector Space",
journal = j-TALLIP,
volume = "14",
number = "2",
pages = "9:1--9:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2699927",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Measuring the quality of the translation rules and
their composition is an essential issue in the
conventional statistical machine translation (SMT)
framework. To express the translation quality, the
previous lexical and phrasal probabilities are
calculated only according to the co-occurrence
statistics in the bilingual corpus and may be not
reliable due to the data sparseness problem. To address
this issue, we propose measuring the quality of the
translation rules and their composition in the semantic
vector embedding space (VES). We present a recursive
neural network (RNN)-based translation framework, which
includes two submodels. One is the
bilingually-constrained recursive auto-encoder, which
is proposed to convert the lexical translation rules
into compact real-valued vectors in the semantic VES.
The other is a type-dependent recursive neural network,
which is proposed to perform the decoding process by
minimizing the semantic gap (meaning distance) between
the source language string and its translation
candidates at each state in a bottom-up structure. The
RNN-based translation model is trained using a
max-margin objective function that maximizes the margin
between the reference translation and the n-best
translations in forced decoding. In the experiments, we
first show that the proposed vector representations for
the translation rules are very reliable for application
in translation modeling. We further show that the
proposed type-dependent, RNN-based model can
significantly improve the translation quality in the
large-scale, end-to-end Chinese-to-English translation
evaluation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "9",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Na:2015:CRF,
author = "Seung-Hoon Na",
title = "Conditional Random Fields for {Korean} Morpheme
Segmentation and {POS} Tagging",
journal = j-TALLIP,
volume = "14",
number = "3",
pages = "10:1--10:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700051",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "There has been recent interest in statistical
approaches to Korean morphological analysis. However,
previous studies have been based mostly on generative
models, including a hidden Markov model (HMM), without
utilizing discriminative models such as a conditional
random field (CRF). We present a two-stage
discriminative approach based on CRFs for Korean
morphological analysis. Similar to methods used for
Chinese, we perform two disambiguation procedures based
on CRFs: (1) morpheme segmentation and (2) POS tagging.
In morpheme segmentation, an input sentence is
segmented into sequences of morphemes, where a morpheme
unit is either atomic or compound. In the POS tagging
procedure, each morpheme (atomic or compound) is
assigned a POS tag. Once POS tagging is complete, we
carry out a post-processing of the compound morphemes,
where each compound morpheme is further decomposed into
atomic morphemes, which is based on pre-analyzed
patterns and generalized HMMs obtained from the given
tagged corpus. Experimental results show the promise of
our proposed method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "10",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2015:MTM,
author = "Xiaodong Liu and Kevin Duh and Yuji Matsumoto",
title = "Multilingual Topic Models for Bilingual Dictionary
Extraction",
journal = j-TALLIP,
volume = "14",
number = "3",
pages = "11:1--11:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2699939",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "A machine-readable bilingual dictionary plays a
crucial role in many natural language processing tasks,
such as statistical machine translation and
cross-language information retrieval. In this article,
we propose a framework for extracting a bilingual
dictionary from comparable corpora by exploiting a
novel combination of topic modeling and word aligners
such as the IBM models. Using a multilingual topic
model, we first convert a comparable document -aligned
corpus into a parallel topic -aligned corpus. This
novel topic-aligned corpus is similar in structure to
the sentence -aligned corpus frequently employed in
statistical machine translation and allows us to
extract a bilingual dictionary using a word alignment
model. The main advantages of our framework is that (1)
no seed dictionary is necessary for bootstrapping the
process, and (2) multilingual comparable corpora in
more than two languages can also be exploited. In our
experiments on a large-scale Wikipedia dataset, we
demonstrate that our approach can extract higher
precision dictionaries compared to previous approaches
and that our method improves further as we add more
languages to the dataset.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "11",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2015:UMS,
author = "Xiaoqing Li and Chengqing Zong and Keh-yih Su",
title = "A Unified Model for Solving the {OOV} Problem of
{Chinese} Word Segmentation",
journal = j-TALLIP,
volume = "14",
number = "3",
pages = "12:1--12:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2699940",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article proposes a unified, character-based,
generative model to incorporate additional resources
for solving the out-of-vocabulary (OOV) problem of
Chinese word segmentation, within which different types
of additional information can be utilized independently
in corresponding submodels. This article mainly
addresses the following three types of OOV: unseen
dictionary words, named entities, and suffix-derived
words, none of which are handled well by current
approaches. The results show that our approach can
effectively improve the performance of the first two
types with positive interaction in F-score.
Additionally, we also analyze reason that suffix
information is not helpful. After integrating the
proposed generative model with the corresponding
discriminative approach, our evaluation on various
corpora---including SIGHAN-2005, CIPS-SIGHAN-2010, and
the Chinese Treebank (CTB)---shows that our integrated
approach achieves the best performance reported in the
literature on all testing sets when additional
information and resources are allowed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "12",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Goto:2015:PUT,
author = "Isao Goto and Masao Utiyama and Eiichiro Sumita and
Sadao Kurohashi",
title = "Preordering using a Target-Language Parser via
Cross-Language Syntactic Projection for Statistical
Machine Translation",
journal = j-TALLIP,
volume = "14",
number = "3",
pages = "13:1--13:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2699925",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "When translating between languages with widely
different word orders, word reordering can present a
major challenge. Although some word reordering methods
do not employ source-language syntactic structures,
such structures are inherently useful for word
reordering. However, high-quality syntactic parsers are
not available for many languages. We propose a
preordering method using a target-language syntactic
parser to process source-language syntactic structures
without a source-language syntactic parser. To train
our preordering model based on ITG, we produced
syntactic constituent structures for source-language
training sentences by (1) parsing target-language
training sentences, (2) projecting constituent
structures of the target-language sentences to the
corresponding source-language sentences, (3) selecting
parallel sentences with highly synchronized parallel
structures, (4) producing probabilistic models for
parsing using the projected partial structures and the
Pitman-Yor process, and (5) parsing to produce full
binary syntactic structures maximally synchronized with
the corresponding target-language syntactic structures,
using the constraints of the projected partial
structures and the probabilistic models. Our ITG-based
preordering model is trained using the produced binary
syntactic structures and word alignments. The proposed
method facilitates the learning of ITG by producing
highly synchronized parallel syntactic structures based
on cross-language syntactic projection and sentence
selection. The preordering model jointly parses input
sentences and identifies their reordered structures.
Experiments with Japanese--English and Chinese--English
patent translation indicate that our method outperforms
existing methods, including string-to-tree syntax-based
SMT, a preordering method that does not require a
parser, and a preordering method that uses a
source-language dependency parser.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "13",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Costa-Jussa:2016:DCS,
author = "Marta R. Costa-Juss{\`a} and Jordi Centelles",
title = "Description of the {Chinese}-to-{Spanish} Rule-Based
Machine Translation System Developed Using a Hybrid
Combination of Human Annotation and Statistical
Techniques",
journal = j-TALLIP,
volume = "15",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2738045",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Two of the most popular Machine Translation (MT)
paradigms are rule based (RBMT) and corpus based, which
include the statistical systems (SMT). When scarce
parallel corpus is available, RBMT becomes particularly
attractive. This is the case of the Chinese--Spanish
language pair. This article presents the first RBMT
system for Chinese to Spanish. We describe a hybrid
method for constructing this system taking advantage of
available resources such as parallel corpora that are
used to extract dictionaries and lexical and structural
transfer rules. The final system is freely available
online and open source. Although performance lags
behind standard SMT systems for an in-domain test set,
the results show that the RBMT's coverage is
competitive and it outperforms the SMT system in an
out-of-domain test set. This RBMT system is available
to the general public, it can be further enhanced, and
it opens up the possibility of creating future hybrid
MT systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "1",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Khanduja:2016:HFE,
author = "Deepti Khanduja and Neeta Nain and Subhash Panwar",
title = "A Hybrid Feature Extraction Algorithm for {Devanagari}
Script",
journal = j-TALLIP,
volume = "15",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2710018",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "The efficiency of any character recognition technique
is directly dependent on the accuracy of the generated
feature set that could uniquely represent a character
and hence correctly recognize it. This article proposes
a hybrid approach combining the structural features of
the character and a mathematical model of curve fitting
to simulate the best features of a character. As a
preprocessing step, skeletonization of the character is
performed using an iterative thinning algorithm based
on Raster scan of the character image. Then, a
combination of structural features of the character
like number of endpoints, loops, and intersection
points is calculated. Further, the thinned character
image is statistically zoned into partitions, and a
quadratic curve-fitting model is applied on each
partition forming a feature vector of the coefficients
of the optimally fitted curve. This vector is combined
with the spatial distribution of the foreground pixels
for each zone and hence script-independent feature
representation. The approach has been evaluated
experimentally on Devanagari scripts. The algorithm
achieves an average recognition accuracy of 93.4\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "2",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Shatnawi:2016:IHA,
author = "Maad Shatnawi and Sherief Abdallah",
title = "Improving Handwritten {Arabic} Character Recognition
by Modeling Human Handwriting Distortions",
journal = j-TALLIP,
volume = "15",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2764456",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Handwritten Arabic character recognition systems face
several challenges, including the unlimited variation
in human handwriting and the unavailability of large
public databases of handwritten characters and words.
The use of synthetic data for training and testing
handwritten character recognition systems is one of the
possible solutions to provide several variations for
these characters and to overcome the lack of large
databases. While this can be using arbitrary
distortions, such as image noise and randomized affine
transformations, such distortions are not realistic. In
this work, we model real distortions in handwriting
using real handwritten Arabic character examples and
then use these distortion models to synthesize
handwritten examples that are more realistic. We show
that the use of our proposed approach leads to
significant improvements across different
machine-learning classification algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "3",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wushouer:2016:CAP,
author = "Mairidan Wushouer and Donghui Lin and Toru Ishida and
Katsutoshi Hirayama",
title = "A Constraint Approach to Pivot-Based Bilingual
Dictionary Induction",
journal = j-TALLIP,
volume = "15",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2723144",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "High-quality bilingual dictionaries are very useful,
but such resources are rarely available for
lower-density language pairs, especially for those that
are closely related. Using a third language to link two
other languages is a well-known solution and usually
requires only two input bilingual dictionaries A-B and
B-C to automatically induce the new one, A-C. This
approach, however, has never been demonstrated to
utilize the complete structures of the input bilingual
dictionaries, and this is a key failing because the
dropped meanings negatively influence the result. This
article proposes a constraint approach to pivot-based
dictionary induction where language A and C are closely
related. We create constraints from language similarity
and model the structures of the input dictionaries as a
Boolean optimization problem, which is then formulated
within the Weighted Partial Max-SAT framework, an
extension of Boolean Satisfiability (SAT). All of the
encoded CNF (Conjunctive Normal Form), the predominant
input language of modern SAT/MAX-SAT solvers, formulas
are evaluated by a solver to produce the target
(output) bilingual dictionary. Moreover, we discuss
alternative formalizations as a comparison study. We
designed a tool that uses the Sat4j library as the
default solver to implement our method and conducted an
experiment in which the output bilingual dictionary
achieved better quality than the baseline method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "4",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yeh:2016:SAI,
author = "Jui-Feng Yeh",
title = "Speech Act Identification Using Semantic Dependency
Graphs with Probabilistic Context-Free Grammars",
journal = j-TALLIP,
volume = "15",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2786978",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "We propose an approach for identifying the speech acts
of speakers' utterances in conversational spoken
dialogue that involves using semantic dependency graphs
with probabilistic context-free grammars (PCFGs). The
semantic dependency graph based on the HowNet knowledge
base is adopted to model the relationships between
words in an utterance parsed by PCFG. Dependency
relationships between words within the utterance are
extracted by decomposing the semantic dependency graph
according to predefined events. The corresponding
values of semantic slots are subsequently extracted
from the speaker's utterances according to the
corresponding identified speech act. The experimental
results obtained when using the proposed approach
indicated that the accuracy rates of speech act
detection and task completion were 95.6\% and 77.4\%
for human-generated transcription (REF) and
speech-to-text recognition output (STT), respectively,
and the average numbers of turns of each dialogue were
8.3 and 11.8 for REF and STT, respectively. Compared
with Bayes classifier, partial pattern tree, and
Bayesian-network-based approaches, we obtained 14.1\%,
9.2\%, and 3\% improvements in the accuracy of speech
act identification, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "5",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2016:CCSa,
author = "Ting-Xuan Wang and Wen-Hsiang Lu",
title = "Constructing Complex Search Tasks with Coherent
Subtask Search Goals",
journal = j-TALLIP,
volume = "15",
number = "2",
pages = "6:1--6:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2742547",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Nowadays, due to the explosive growth of web content
and usage, users deal with their complex search tasks
by web search engines. However, conventional search
engines consider a search query corresponding only to a
simple search task. In order to accomplish a complex
search task, which consists of multiple subtask search
goals, users usually have to issue a series of queries.
For example, the complex search task ``travel to
Dubai'' may involve several subtask search goals,
including reserving hotel room, surveying Dubai
landmarks, booking flights, and so forth. Therefore, a
user can efficiently accomplish his or her complex
search task if search engines can predict the complex
search task with a variety of subtask search goals. In
this work, we propose a complex search task model
(CSTM) to deal with this problem. The CSTM first groups
queries into complex search task clusters, and then
generates subtask search goals from each complex search
task cluster. To raise the performance of CSTM, we
exploit four web resources including community question
answering, query logs, search engine result pages, and
clicked pages. Experimental results show that our CSTM
is effective in identifying the comprehensive subtask
search goals of a complex search task.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "6",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Tsai:2016:CWB,
author = "Richard Tzong-Han Tsai",
title = "Collective {Web}-Based Parenthetical Translation
Extraction Using {Markov} Logic Networks",
journal = j-TALLIP,
volume = "15",
number = "2",
pages = "7:1--7:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2794399",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Parenthetical translations are translations of terms
in otherwise monolingual text that appear inside
parentheses. Parenthetical translations extraction
(PTE) is the task of extracting parenthetical
translations from natural language documents. One of
the main difficulties in PTE is to detect the left
boundary of the translated term in preparenthetical
text. In this article, we propose a collective approach
that employs Markov logic to model multiple constraints
used in the PTE task. We show how various constraints
can be formulated and combined in a Markov logic
network (MLN). Our experimental results show that the
proposed collective PTE approach significantly
outperforms a current state-of-the-art method,
improving the average F-measure up to 27.11\% compared
to the previous word alignment approach. It also
outperforms an individual MLN-based system by 8.2\% and
a system based on conditional random fields by 5.9\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "7",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jain:2016:FHW,
author = "Amita Jain and D. K. Lobiyal",
title = "Fuzzy {Hindi} {WordNet} and Word Sense Disambiguation
Using Fuzzy Graph Connectivity Measures",
journal = j-TALLIP,
volume = "15",
number = "2",
pages = "8:1--8:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2790079",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In this article, we propose Fuzzy Hindi WordNet, which
is an extended version of Hindi WordNet. The proposed
idea of fuzzy relations and their role in modeling
Fuzzy Hindi WordNet is explained. We mathematically
define fuzzy relations and the composition of these
fuzzy relations for this extended version. We show that
the concept of composition of fuzzy relations can be
used to infer a relation between two words that
otherwise are not directly related in Hindi WordNet.
Then we propose fuzzy graph connectivity measures that
include both local and global measures. These measures
are used in determining the significance of a concept
(which is represented as a vertex in the fuzzy graph)
in a specific context. Finally, we show how these
extended measures solve the problem of word sense
disambiguation (WSD) effectively, which is useful in
many natural language processing applications to
improve their performance. Experiments on standard
sense tagged corpus for WSD show better results when
Fuzzy Hindi WordNet is used in place of Hindi
WordNet.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "8",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kertkeidkachorn:2016:AFH,
author = "Natthawut Kertkeidkachorn and Proadpran Punyabukkana
and Atiwong Suchato",
title = "Acoustic Features for Hidden Conditional Random
Fields-Based {Thai} Tone Classification",
journal = j-TALLIP,
volume = "15",
number = "2",
pages = "9:1--9:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2833088",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In the Thai language, tone information is necessary
for Thai speech recognition systems. Previous studies
show that many acoustic cues are attributed to shapes
of tones. Nevertheless, most Thai tone classification
studies mainly adopted F$_0$ values and their
derivatives without considering other acoustic
features. In this article, other acoustic features for
Thai tone classification are investigated. In the
experiment, energy values and spectral information
represented by three spectral-based features including
the LPC-based feature, PLP-based feature, and
MFCC-based feature are applied to the HCRF-based Thai
tone classification, which was reported as the best
approach for Thai tone classification. The energy
values provide an error rate reduction of 22.40\% in
the isolated word scenario, while there are slight
improvements in the continuous speech scenario. On the
contrary, spectral-based features greatly contribute to
Thai tone classification in the continuous-speech
scenario, whereas spectral-based features slightly
degrade performances in the isolated-word scenario. The
best achievement in the continuous-speech scenario is
obtained from the PLP-based feature, which yields an
error rate reduction of 13.90\%. Therefore, findings in
this article are that energy values and spectral-based
features, especially the PLP-based feature, are the
main contributors to the improvement of the
performances of Thai tone classification in the
isolated-word scenario and the continuous-speech
scenario, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "9",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Chu:2016:IPS,
author = "Chenhui Chu and Toshiaki Nakazawa and Sadao
Kurohashi",
title = "Integrated Parallel Sentence and Fragment Extraction
from Comparable Corpora: a Case Study on
{Chinese--Japanese} {Wikipedia}",
journal = j-TALLIP,
volume = "15",
number = "2",
pages = "10:1--10:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2833089",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Parallel corpora are crucial for statistical machine
translation (SMT); however, they are quite scarce for
most language pairs and domains. As comparable corpora
are far more available, many studies have been
conducted to extract either parallel sentences or
fragments from them for SMT. In this article, we
propose an integrated system to extract both parallel
sentences and fragments from comparable corpora. We
first apply parallel sentence extraction to identify
parallel sentences from comparable sentences. We then
extract parallel fragments from the comparable
sentences. Parallel sentence extraction is based on a
parallel sentence candidate filter and classifier for
parallel sentence identification. We improve it by
proposing a novel filtering strategy and three novel
feature sets for classification. Previous studies have
found it difficult to accurately extract parallel
fragments from comparable sentences. We propose an
accurate parallel fragment extraction method that uses
an alignment model to locate the parallel fragment
candidates and an accurate lexicon-based filter to
identify the truly parallel fragments. A case study on
the Chinese--Japanese Wikipedia indicates that our
proposed methods outperform previously proposed
methods, and the parallel data extracted by our system
significantly improves SMT performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "10",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2016:CCSb,
author = "Rui Wang and Masao Utiyama and Isao Goto and Eiichiro
Sumita and Hai Zhao and Bao-Liang Lu",
title = "Converting Continuous-Space Language Models into
{$N$}-gram Language Models with Efficient Bilingual
Pruning for Statistical Machine Translation",
journal = j-TALLIP,
volume = "15",
number = "3",
pages = "11:1--11:??",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2843942",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "The Language Model (LM) is an essential component of
Statistical Machine Translation (SMT). In this article,
we focus on developing efficient methods for LM
construction. Our main contribution is that we propose
a Natural N -grams based Converting (NNGC) method for
transforming a Continuous-Space Language Model (CSLM)
to a Back-off N -gram Language Model (BNLM).
Furthermore, a Bilingual LM Pruning (BLMP) approach is
developed for enhancing LMs in SMT decoding and
speeding up CSLM converting. The proposed pruning and
converting methods can convert a large LM efficiently
by working jointly. That is, a LM can be effectively
pruned before it is converted from CSLM without
sacrificing performance, and further improved if an
additional corpus contains out-of-domain information.
For different SMT tasks, our experimental results
indicate that the proposed NNGC and BLMP methods
outperform the existing counterpart approaches
significantly in BLEU and computational cost.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "11",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Chakrabarty:2016:BBL,
author = "Abhisek Chakrabarty and Utpal Garain",
title = "{BenLem} (A {Bengali} Lemmatizer) and Its Role in
{WSD}",
journal = j-TALLIP,
volume = "15",
number = "3",
pages = "12:1--12:??",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2835494",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "A lemmatization algorithm for Bengali has been
developed and evaluated. Its effectiveness for word
sense disambiguation (WSD) is also investigated. One of
the key challenges for computer processing of highly
inflected languages is to deal with the frequent
morphological variations of the root words appearing in
the text. Therefore, a lemmatizer is essential for
developing natural language processing (NLP) tools for
such languages. In this experiment, Bengali, which is
the national language of Bangladesh and the second most
popular language in the Indian subcontinent, has been
taken as a reference. In order to design the Bengali
lemmatizer (named as BenLem), possible transformations
through which surface words are formed from lemmas are
studied so that appropriate reverse transformations can
be applied on a surface word to get the corresponding
lemma back. BenLem is found to be capable of handling
both inflectional and derivational morphology in
Bengali. It is evaluated on a set of 18 news articles
taken from the FIRE Bengali News Corpus consisting of
3,342 surface words (excluding proper nouns) and found
to be 81.95\% accurate. The role of the lemmatizer is
then investigated for Bengali WSD. Ten highly
polysemous Bengali words are considered for sense
disambiguation. The FIRE corpus and a collection of
Tagore's short stories are considered for creating the
WSD dataset. Different WSD systems are considered for
this experiment, and it is noticed that BenLem improves
the performance of all the WSD systems and the
improvements are statistically significant.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "12",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhou:2016:ESR,
author = "Hao Zhou and Shujian Huang and Junsheng Zhou and Yue
Zhang and Huadong Chen and Xinyu Dai and Chuan Cheng
and Jiajun Chen",
title = "Enhancing Shift--Reduce Constituent Parsing with
Action {$N$}-Gram Model",
journal = j-TALLIP,
volume = "15",
number = "3",
pages = "13:1--13:??",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2820902",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Current shift-reduce parsers ``understand'' the
context by embodying a large number of binary indicator
features with a discriminative model. In this article,
we propose the action n-gram model, which utilizes the
action sequence to help parsing disambiguation. The
action n-gram model is trained on action sequences
produced by parsers with the n-gram estimation method,
which gives a smoothed maximum likelihood estimation of
the action probability given a specific action history.
We show that incorporating action n-gram models into a
state-of-the-art parsing framework could achieve
parsing accuracy improvements on three datasets across
two languages.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "13",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sadek:2016:EAC,
author = "Jawad Sadek and Farid Meziane",
title = "Extracting {Arabic} Causal Relations Using Linguistic
Patterns",
journal = j-TALLIP,
volume = "15",
number = "3",
pages = "14:1--14:??",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2800786",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Identifying semantic relations is a crucial step in
discourse analysis and is useful for many applications
in both language and speech technology. Automatic
detection of Causal relations therefore has gained
popularity in the literature within different
frameworks. The aim of this article is the automatic
detection and extraction of Causal relations that are
explicitly expressed in Arabic texts. To fulfill this
goal, a Pattern Recognizer model was developed to
signal the presence of cause--effect information within
sentences from nonspecific domain texts. This model
incorporates approximately 700 linguistic patterns so
that parts of the sentence representing the cause and
those representing the effect can be distinguished. The
patterns were constructed based on different sets of
syntactic features by analyzing a large untagged Arabic
corpus. In addition, the model was boosted with three
independent algorithms to deal with certain types of
grammatical particles that indicate causation. With
this approach, the proposed model achieved an overall
recall of 81\% and a precision of 78\%. Evaluation
results revealed that the justification particles play
a key role in detecting Causal relations. To the best
of our knowledge, no previous studies have been
dedicated to dealing with this type of relation in the
Arabic language.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "14",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yang:2016:BSR,
author = "Haitong Yang and Yu Zhou and Chengqing Zong",
title = "Bilingual Semantic Role Labeling Inference via Dual
Decomposition",
journal = j-TALLIP,
volume = "15",
number = "3",
pages = "15:1--15:??",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2835493",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article focuses on bilingual Semantic Role
Labeling (SRL); its goal is to annotate semantic roles
on both sides of the parallel bilingual texts
(bi-texts). Since rich bilingual information is
encoded, bilingual SRL has been applied in many
natural-language processing (NLP) tasks such as machine
translation (MT), cross-lingual information retrieval
(IR), and the like. A feasible way of performing
bilingual SRL is using monolingual SRL systems to
perform SRL on each side of bi-texts separately.
However, it is difficult to obtain consistent SRL
results on both sides of bi-texts in this way. Some
works have tried to jointly infer bilingual SRL because
there are many complementary language cues on both
sides of bi-texts and they reported better performance
than monolingual systems. However, there are two limits
in the existing methods. First, the existing methods
often require high inference costs due to the complex
objective function. Second, the existing methods fully
adopt the candidates generated by monolingual SRL
systems, but many candidates are discarded in the
argument pruning or identification stage of monolingual
systems. In this article, we propose two strategies to
overcome these limits. We utilize a simple but
efficient technique: Dual Decomposition to search for
consistent results for both sides of bi-texts. On the
other hand, we propose a method called Bi-Directional
Projection (BDP) to recover arguments discarded in
monolingual SRL systems. We evaluate our method on a
standard parallel benchmark: the OntoNotes dataset. The
experimental results show that our method yields
significant improvements over the state-of-the-art
monolingual systems. In addition, our approach is also
better and faster than existing methods due to BDP and
Dual Decomposition.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "15",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2016:MMC,
author = "Maoxi Li and Mingwen Wang and Hanxi Li and Fan Xu",
title = "Modeling Monolingual Character Alignment for Automatic
Evaluation of {Chinese} Translation",
journal = j-TALLIP,
volume = "15",
number = "3",
pages = "16:1--16:??",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2815619",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Automatic evaluation of machine translations is an
important task. Most existing evaluation metrics rely
on matching the same word or letter n -grams. This
strategy leads to poor results on Chinese translations
because one has to rely merely on matching identical
characters. In this article, we propose a new
evaluation metric that allows different characters with
the same or similar meaning to match. An Indirect
Hidden Markov Model (IHMM) is proposed to align the
Chinese translation with human references at the
character level. In the model, the emission
probabilities are estimated by character similarity,
including character semantic similarity and character
surface similarity, and transition probabilities are
estimated by a heuristic distance-based distortion
model. When evaluating the submitted output of
English-to-Chinese translation systems in the IWSLT'08
CT-EC and NIST'08 EC tasks, the experimental results
indicate that the proposed metric has a significantly
better correlation with human evaluation than the
state-of-the-art machine translation metrics (i.e.,
BLEU, Meteor Universal, and TESLA-CELAB). This study
shows that it is important to allow different
characters to match in the evaluation of Chinese
translations and that the IHMM is a reasonable approach
for the alignment of Chinese characters.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "16",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Abuaiadah:2016:UBM,
author = "Diab Abuaiadah",
title = "Using Bisect {$K$}-Means Clustering Technique in the
Analysis of {Arabic} Documents",
journal = j-TALLIP,
volume = "15",
number = "3",
pages = "17:1--17:??",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2812809",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In this article, I have investigated the performance
of the bisect K-means clustering algorithm compared to
the standard K-means algorithm in the analysis of
Arabic documents. The experiments included five
commonly used similarity and distance functions
(Pearson correlation coefficient, cosine, Jaccard
coefficient, Euclidean distance, and averaged
Kullback--Leibler divergence) and three leading
stemmers. Using the purity measure, the bisect K-means
clearly outperformed the standard K-means in all
settings with varying margins. For the bisect K-means,
the best purity reached 0.927 when using the Pearson
correlation coefficient function, while for the
standard K-means, the best purity reached 0.884 when
using the Jaccard coefficient function. Removing stop
words significantly improved the results of the bisect
K-means but produced minor improvements in the results
of the standard K-means. Stemming provided additional
minor improvement in all settings except the
combination of the averaged Kullback--Leibler
divergence function and the root-based stemmer, where
the purity was deteriorated by more than 10\%. These
experiments were conducted using a dataset with nine
categories, each of which contains 300 documents.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "17",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Elayeb:2016:ACL,
author = "Bilel Elayeb and Ibrahim Bounhas",
title = "{Arabic} Cross-Language Information Retrieval: a
Review",
journal = j-TALLIP,
volume = "15",
number = "3",
pages = "18:1--18:??",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2789210",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Cross-language information retrieval (CLIR) deals with
retrieving relevant documents in one language using
queries expressed in another language. As CLIR tools
rely on translation techniques, they are challenged by
the properties of highly derivational and flexional
languages like Arabic. Much work has been done on CLIR
for different languages including Arabic. In this
article, we introduce the reader to the motivations for
solving some problems related to Arabic CLIR
approaches. The evaluation of these approaches is
discussed starting from the 2001 and 2002 TREC Arabic
CLIR tracks, which aim to objectively evaluate CLIR
systems. We also study many other research works to
highlight the unresolved problems or those that require
further investigation. These works are discussed in the
light of a deep study of the specificities and the
tasks of Arabic information retrieval (IR). Particular
attention is given to translation techniques and CLIR
resources, which are key issues challenging Arabic
CLIR. To push research in this field, we discuss how a
new standard collection can improve Arabic IR and CLIR
tracks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "18",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhao:2016:ALM,
author = "Yinggong Zhao and Shujian Huang and Xin-Yu Dai and
Jiajun Chen",
title = "Adaptation of Language Models for {SMT} Using Neural
Networks with Topic Information",
journal = j-TALLIP,
volume = "15",
number = "3",
pages = "19:1--19:??",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2816816",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Neural network language models (LMs) are shown to be
effective in improving the performance of statistical
machine translation (SMT) systems. However,
state-of-the-art neural network LMs usually use words
before the current position as context and neglect
global topic information, which can help machine
translation (MT) systems to select better translation
candidates from a higher perspective. In this work, we
propose improvement of the state-of-the-art feedforward
neural language model with topic information. Two main
issues need to be tackled when adding topics into
neural network LMs for SMT: one is how to incorporate
topics to the neural network; the other is how to get
target-side topic distribution before translation. We
incorporate topics by appending topic distribution to
the input layer of a feedforward LM. We adopt a
multinomial logistic-regression (MLR) model to predict
the target-side topic distribution based on source side
information. Moreover, we propose a feedforward neural
network model to learn joint representations on the
source side for topic prediction. LM experiments
demonstrate that the perplexity on validation set can
be greatly reduced by the topic-enhanced feedforward
LM, and the prediction of target-side topics can be
improved dramatically with the MLR model equipped with
the joint source representations. A final MT
experiment, conducted on a large-scale Chinese--English
dataset, shows that our feedforward LM with predicted
topics improves the translation performance against a
strong baseline.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "19",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ding:2016:IIE,
author = "Chenchen Ding and Keisuke Sakanushi and Hirona Touji
and Mikio Yamamoto",
title = "Inter-, Intra-, and Extra-Chunk Pre-Ordering for
Statistical {Japanese}-to-{English} Machine
Translation",
journal = j-TALLIP,
volume = "15",
number = "3",
pages = "20:1--20:??",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2818381",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:50 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "A rule-based pre-ordering approach is proposed for
statistical Japanese-to-English machine translation
using the dependency structure of source-side
sentences. A Japanese sentence is pre-ordered to an
English-like order at the morpheme level for a
statistical machine translation system during the
training and decoding phase to resolve the reordering
problem. In this article, extra-chunk pre-ordering of
morphemes is proposed, which allows Japanese functional
morphemes to move across chunk boundaries. This
contrasts with the intra-chunk reordering used in
previous approaches, which restricts the reordering of
morphemes within a chunk. Linguistically oriented
discussions show that correct pre-ordering cannot be
realized without extra-chunk movement of morphemes. The
proposed approach is compared with five rule-based
pre-ordering approaches designed for
Japanese-to-English translation and with a language
independent statistical pre-ordering approach on a
standard patent dataset and on a news dataset obtained
by crawling Internet news sites. Two state-of-the-art
statistical machine translation systems, one
phrase-based and the other hierarchical phrase-based,
are used in experiments. Experimental results show that
the proposed approach outperforms the compared
approaches on automatic reordering measures (Kendall's
$ \tau $, Spearman's $ \rho $, fuzzy reordering score,
and test set RIBES) and on the automatic translation
precision measure of test set BLEU score.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "20",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Lee:2015:ISI,
author = "Lung-Hao Lee and Gina-Anne Levow and Shih-Hung Wu and
Chao-Lin Liu",
title = "Introduction to the Special Issue on {Chinese} Spell
Checking",
journal = j-TALLIP,
volume = "14",
number = "4",
pages = "14:1--14:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2818354",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib;
https://www.math.utah.edu/pub/tex/bib/tallip.bib",
note = "Special issue on Chinese spell checking.",
abstract = "This special issue contains four articles based on and
expanded from systems presented at the SIGHAN-7 Chinese
Spelling Check Bakeoff. We provide an overview of the
approaches and designs for Chinese spelling checkers
presented in these articles. We conclude this
introductory article with a summary of possible future
directions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "14",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Chen:2015:PFC,
author = "Kuan-Yu Chen and Hsin-Min Wang and Hsin-Hsi Chen",
title = "A Probabilistic Framework for {Chinese} Spelling
Check",
journal = j-TALLIP,
volume = "14",
number = "4",
pages = "15:1--15:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2826234",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib;
https://www.math.utah.edu/pub/tex/bib/tallip.bib",
note = "Special issue on Chinese spell checking.",
abstract = "Chinese spelling check (CSC) is still an unsolved
problem today since there are many homonymous or
homomorphous characters. Recently, more and more CSC
systems have been proposed. To the best of our
knowledge, language modeling is one of the major
components among these systems because of its
simplicity and moderately good predictive power. After
deeply analyzing the school of research, we are aware
that most of the systems only employ the conventional n
-gram language models. The contributions of this
article are threefold. First, we propose a novel
probabilistic framework for CSC, which naturally
combines several important components, such as the
substitution model and the language model, to inherit
their individual merits as well as to overcome their
limitations. Second, we incorporate the topic language
models into the CSC system in an unsupervised fashion.
The topic language models can capture the long-span
semantic information from a word (character) string
while the conventional n -gram language models can only
preserve the local regularity information. Third, we
further integrate Web resources with the proposed
framework to enhance the overall performance. Our
rigorously empirical experiments demonstrate the
consistent and utility performance of the proposed
framework in the CSC task.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "15",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2015:HRA,
author = "Xiaodong Liu and Fei Cheng and Kevin Duh and Yuji
Matsumoto",
title = "A Hybrid Ranking Approach to {Chinese} Spelling
Check",
journal = j-TALLIP,
volume = "14",
number = "4",
pages = "16:1--16:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2822264",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib;
https://www.math.utah.edu/pub/tex/bib/tallip.bib",
note = "Special issue on Chinese spell checking.",
abstract = "We propose a novel framework for Chinese Spelling
Check (CSC), which is an automatic algorithm to detect
and correct Chinese spelling errors. Our framework
contains two key components: candidate generation and
candidate ranking. Our framework differs from previous
research, such as Statistical Machine Translation (SMT)
based model or Language Model (LM) based model, in that
we use both SMT and LM models as components of our
framework for generating the correction candidates, in
order to obtain maximum recall; to improve the
precision, we further employ a Support Vector Machines
(SVM) classifier to rank the candidates generated by
the SMT and the LM. Experiments show that our framework
outperforms other systems, which adopted the same or
similar resources as ours in the SIGHAN 7 shared task;
even comparing with the state-of-the-art systems, which
used more resources, such as a considerable large
dictionary, an idiom dictionary and other semantic
information, our framework still obtains competitive
results. Furthermore, to address the resource
scarceness problem for training the SMT model, we
generate around 2 million artificial training sentences
using the Chinese character confusion sets, which
include a set of Chinese characters with similar shapes
and similar pronunciations, provided by the SIGHAN 7
shared task.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "16",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yeh:2015:CSC,
author = "Jui-Feng Yeh and Wen-Yi Chen and Mao-Chuan Su",
title = "{Chinese} Spelling Checker Based on an Inverted Index
List with a Rescoring Mechanism",
journal = j-TALLIP,
volume = "14",
number = "4",
pages = "17:1--17:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2826235",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib;
https://www.math.utah.edu/pub/tex/bib/tallip.bib",
note = "Special issue on Chinese spell checking.",
abstract = "An approach is proposed for Chinese spelling error
detection and correction, in which an inverted index
list with a rescoring mechanism is used. The inverted
index list is a structure for mapping from word to
desired sentence, and for representing nodes in
lattices constructed through character expansion
(according to predefined phonologically and visually
similar character sets). Pruning based on a contextual
dependency confidence measure was used to markedly
reduce the search space and computational complexity.
Relevant mapping relations between the original input
and desired input were obtained using a scoring
mechanism composed of class-based language and maximum
entropy correction models containing character, word,
and contextual features. The proposed method was
evaluated using data sets provided by SigHan 7 bakeoff.
The experimental results show that the proposed method
achieved acceptable performance in terms of recall rate
or precision rate in error sentence detection and error
location detection, and it outperformed other
approaches in error location detection and
correction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "17",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Hsieh:2015:CCS,
author = "Yu-Ming Hsieh and Ming-Hong Bai and Shu-Ling Huang and
Keh-Jiann Chen",
title = "Correcting {Chinese} Spelling Errors with Word Lattice
Decoding",
journal = j-TALLIP,
volume = "14",
number = "4",
pages = "18:1--18:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2791389",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib;
https://www.math.utah.edu/pub/tex/bib/tallip.bib",
note = "Special issue on Chinese spell checking.",
abstract = "Chinese spell checkers are more difficult to develop
because of two language features: (1) there are no word
boundaries, and a character may function as a word or a
word morpheme; and (2) the Chinese character set
contains more than ten thousand characters. The former
makes it difficult for a spell checker to detect
spelling errors, and the latter makes it difficult for
a spell checker to construct error models. We develop a
word lattice decoding model for a Chinese spell checker
that addresses these difficulties. The model performs
word segmentation and error correction simultaneously,
thereby solving the word boundary problem. The model
corrects nonword errors as well as real-word errors. In
order to better estimate the error distribution of
large character sets for error models, we also propose
a methodology to extract spelling error samples
automatically from the Google web 1T corpus. Due to the
large quantity of data in the Google web 1T corpus,
many spelling error samples can be extracted, better
reflecting spelling error distributions in the real
world. Finally, in order to improve the spell checker
for real applications, we produce $n$-best suggestions
for spelling error corrections. We test our proposed
approach with the Bakeoff 2013 CSC Datasets; the
results show that the proposed methods with the error
model significantly outperform the performance of
Chinese spell checkers that do not use error models.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "18",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Anonymous:2015:TPE,
author = "Anonymous",
title = "{TALLIP} Perspectives: Editorial Commentary: The State
of the Journal",
journal = j-TALLIP,
volume = "14",
number = "4",
pages = "19:1--19:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2823512",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:49 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
note = "Special issue on Chinese spell checking.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "19",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Hakro:2016:PTI,
author = "Dil Nawaz Hakro and Abdullah Zawawi Talib",
title = "Printed Text Image Database for {Sindhi} {OCR}",
journal = j-TALLIP,
volume = "15",
number = "4",
pages = "21:1--21:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2846093",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Document Image Understanding (DIU) and Electronic
Document Management are active fields of research
involving image understanding, interpretation,
efficient handling, and routing of documents as well as
their retrieval. Research on most of the noncursive
scripts (Latin) has matured, whereas research on the
cursive (connected) scripts is still moving toward
perfection. Many researchers are currently working on
the cursive scripts (Arabic and other scripts adopting
it) around the world so that the difficulties and
challenges in document understanding and handling of
these scripts can be overcome. Sindhi script has the
largest extension of the original Arabic alphabet among
languages adopting the Arabic script; it contains 52
characters, compared to 28 characters in the original
Arabic alphabet, in order to accommodate more sounds
for the language. There are 24 differentiating
characters with some possessing four dots. For Sindhi
OCR research and development, a database is needed for
training and testing of Sindhi text images. We have
developed a large database containing over 4 billion
words and 15 billion characters in 150 various fonts in
four font weights and four styles. The database
contents were collected from various sources including
websites, books, and theses. A custom-built application
was also developed to create a text image from a text
document that supports various fonts and sizes. The
database considers words, characters, characters with
spaces, and lines. The database is freely available as
a partial or full database by sending an email to one
of the authors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "21",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ding:2016:WSB,
author = "Chenchen Ding and Ye Kyaw Thu and Masao Utiyama and
Eiichiro Sumita",
title = "Word Segmentation for {Burmese} ({Myanmar})",
journal = j-TALLIP,
volume = "15",
number = "4",
pages = "22:1--22:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2846095",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Experiments on various word segmentation approaches
for the Burmese language are conducted and discussed in
this note. Specifically, dictionary-based, statistical,
and machine learning approaches are tested.
Experimental results demonstrate that statistical and
machine learning approaches perform significantly
better than dictionary-based approaches. We believe
that this note, based on an annotated corpus of
relatively considerable size (containing approximately
a half million words), is the first systematic
comparison of word segmentation approaches for Burmese.
This work aims to discover the properties and proper
approaches to Burmese textual processing and to promote
further researches on this understudied language.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "22",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhang:2016:ITP,
author = "Tongtao Zhang and Aritra Chowdhury and Nimit Dhulekar
and Jinjing Xia and Kevin Knight and Heng Ji and
B{\"u}lent Yener and Liming Zhao",
title = "From Image to Translation: Processing the Endangered
{Nyushu} Script",
journal = j-TALLIP,
volume = "15",
number = "4",
pages = "23:1--23:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2857052",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "The lack of computational support has significantly
slowed down automatic understanding of endangered
languages. In this paper, we take Nyushu (simplified
Chinese: [Chinese characters]; literally: ``women's
writing'') as a case study to present the first
computational approach that combines Computer Vision
and Natural Language Processing techniques to deeply
understand an endangered language. We developed an
end-to-end system to read a scanned hand-written Nyushu
article, segment it into characters, link them to
standard characters, and then translate the article
into Mandarin Chinese. We propose several novel methods
to address the new challenges introduced by noisy input
and low resources, including Nyushu-specific feature
selection for character segmentation and linking, and
character linking lattice based Machine Translation.
The end-to-end system performance indicates that the
system is a promising approach and can serve as a
standard benchmark.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "23",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sarigil:2016:SPW,
author = "Erdem Sarigil and Oguz Yilmaz and Ismail Sengor
Altingovde and Rifat Ozcan and {\"O}zg{\"U}r Ulusoy",
title = "A ``Suggested'' Picture of {Web} Search in {Turkish}",
journal = j-TALLIP,
volume = "15",
number = "4",
pages = "24:1--24:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2891105",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Although query log analysis provides crucial insights
about Web users' search interests, conducting such
analyses is almost impossible for some languages, as
large-scale and public query logs are quite scarce. In
this study, we first survey the existing query
collections in Turkish and discuss their limitations.
Next, we adopt a novel strategy to obtain a set of
Turkish queries using the query autocompletion services
from the four major search engines and provide the
first large-scale analysis of Web queries and their
results in Turkish.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "24",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Goswami:2016:CPG,
author = "Mukesh M. Goswami and Suman K. Mitra",
title = "Classification of Printed {Gujarati} Characters Using
Low-Level Stroke Features",
journal = j-TALLIP,
volume = "15",
number = "4",
pages = "25:1--25:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2856105",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article presents an elegant technique for
extracting the low-level stroke features, such as
endpoints, junction points, line elements, and curve
elements, from offline printed text using a template
matching approach. The proposed features are used to
classify a subset of characters from Gujarati script.
The database consists of approximately 16,782 samples
of 42 middle-zone symbols from the Gujarati character
set collected from three different sources: machine
printed books, newspapers, and laser printed documents.
The purpose of this division is to add variety in terms
of size, font type, style, ink variation, and boundary
deformation. The experiments are performed on the
database using a k-nearest neighbor (kNN) classifier
and results are compared with other widely used
structural features, namely Chain Codes (CC),
Directional Element Features (DEF), and Histogram of
Oriented Gradients (HoG). The results show that the
features are quite robust against the variations and
give comparable performance with other existing
works.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "25",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Choudhary:2016:FTA,
author = "Prakash Choudhary and Neeta Nain",
title = "A Four-Tier Annotated {Urdu} Handwritten Text Image
Dataset for Multidisciplinary Research on {Urdu}
Script",
journal = j-TALLIP,
volume = "15",
number = "4",
pages = "26:1--26:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2857053",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article introduces a large handwritten text
document image corpus dataset for Urdu script named
CALAM (Cursive And Language Adaptive Methodologies).
The database contains unconstrained handwritten
sentences along with their structural annotations for
the offline handwritten text images with their XML
representation. Urdu is the fourth most frequently used
language in the world, but due to its complex cursive
writing script and low resources, it is still a thrust
area for document image analysis. Here, a unified
approach is applied in the development of an Urdu
corpus by collecting printed texts, handwritten texts,
and demographic information of writers on a single
form. CALAM contains 1,200 handwritten text images,
3,043 lines, 46,664 words, and 101,181 ligatures. For
capturing maximum variance among the words and
handwritten styles, data collection is distributed
among six categories and 14 subcategories. Handwritten
forms were filled out by 725 different writers
belonging to different geographical regions, ages, and
genders with diverse educational backgrounds. A
structure has been designed to annotate handwritten
Urdu script images at line, word, and ligature levels
with an XML standard to provide a ground truth of each
image at different levels of annotation. This corpus
would be very useful for linguistic research in
benchmarking and providing a testbed for evaluation of
handwritten text recognition techniques for Urdu
script, signature verification, writer identification,
digital forensics, classification of printed and
handwritten text, categorization of texts as per use,
and so on. The experimental results of some recently
developed handwritten text line segmentation techniques
experimented on the proposed dataset are also presented
in the article for asserting its viability and
usability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "26",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Norimatsu:2016:FCL,
author = "Jun-Ya Norimatsu and Makoto Yasuhara and Toru Tanaka
and Mikio Yamamoto",
title = "A Fast and Compact Language Model Implementation Using
Double-Array Structures",
journal = j-TALLIP,
volume = "15",
number = "4",
pages = "27:1--27:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2873068",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "The language model is a widely used component in
fields such as natural language processing, automatic
speech recognition, and optical character recognition.
In particular, statistical machine translation uses
language models, and the translation speed and the
amount of memory required are greatly affected by the
performance of the language model implementation. We
propose a fast and compact implementation of n -gram
language models that increases query speed and reduces
memory usage by using a double-array structure, which
is known to be a fast and compact trie data structure.
We propose two types of implementation: one for
backward suffix trees and the other for reverse tries.
The data structure is optimized for space efficiency by
embedding model parameters into otherwise unused spaces
in the double-array structure. We show that the reverse
trie version of our method is among the smallest
state-of-the-art implementations in terms of model size
with almost the same speed as the implementation that
performs fastest on perplexity calculation tasks.
Similarly, we achieve faster decoding while keeping
compact model sizes, and we confirm that our method can
utilize the efficiency of the double-array structure to
achieve a balance between speed and size on translation
tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "27",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yang:2016:LGF,
author = "Haitong Yang and Chengqing Zong",
title = "Learning Generalized Features for Semantic Role
Labeling",
journal = j-TALLIP,
volume = "15",
number = "4",
pages = "28:1--28:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2890496",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article makes an effort to improve Semantic Role
Labeling (SRL) through learning generalized features.
The SRL task is usually treated as a supervised
problem. Therefore, a huge set of features are crucial
to the performance of SRL systems. But these features
often lack generalization powers when predicting an
unseen argument. This article proposes a simple
approach to relieve the issue. A strong intuition is
that arguments occurring in similar syntactic positions
are likely to bear the same semantic role, and,
analogously, arguments that are lexically similar are
likely to represent the same semantic role. Therefore,
it will be informative to SRL if syntactic or lexical
similar arguments can activate the same feature.
Inspired by this, we embed the information of
lexicalization and syntax into a feature vector for
each argument and then use K -means to make clustering
for all feature vectors of training set. For an unseen
argument to be predicted, it will belong to the same
cluster as its similar arguments of training set.
Therefore, the clusters can be thought of as a kind of
generalized feature. We evaluate our method on several
benchmarks. The experimental results show that our
approach can significantly improve the SRL
performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "28",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bhowmik:2016:BHC,
author = "Tapan Kumar Bhowmik and Swapan Kumar Parui and Utpal
Roy and Lambert Schomaker",
title = "{Bangla} Handwritten Character Segmentation Using
Structural Features: a Supervised and Bootstrapping
Approach",
journal = j-TALLIP,
volume = "15",
number = "4",
pages = "29:1--29:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2890497",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In this article, we propose a new framework for
segmentation of Bangla handwritten word images into
meaningful individual symbols or pseudo-characters.
Existing segmentation algorithms are not usually
treated as a classification problem. However, in the
present study, the segmentation algorithm is looked
upon as a two-class supervised classification problem.
The method employs an SVM classifier to select the
segmentation points on the word image on the basis of
various structural features. For training of the SVM
classifier, an unannotated training set is prepared
first using candidate segmenting points. The training
set is then clustered, and each cluster is labeled
manually with minimal manual intervention. A
semi-automatic bootstrapping technique is also employed
to enlarge the training set from new samples. The
overall architecture describes a basic step toward
building an annotation system for the segmentation
problem, which has not so far been investigated. The
experimental results show that our segmentation method
is quite efficient in segmenting not only word images
but also handwritten texts. As a part of this work, a
database of Bangla handwritten word images has also
been developed. Considering our data collection method
and a statistical analysis of our lexicon set, we claim
that the relevant characteristics of an ideal lexicon
set are present in our handwritten word image
database.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "29",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Singh:2016:OHG,
author = "Sukhdeep Singh and Anuj Sharma and Indu Chhabra",
title = "Online Handwritten {Gurmukhi} Strokes Dataset Based on
Minimal Set of Words",
journal = j-TALLIP,
volume = "16",
number = "1",
pages = "1:1--1:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2896318",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "The online handwriting data are an integral part of
data analysis and classification research, as collected
handwritten data offers many challenges to group
handwritten stroke classes. The present work has been
done for grouping handwritten strokes from the Indic
script Gurmukhi. Gurmukhi is the script of the popular
and widely spoken language Punjabi. The present work
includes development of the dataset of Gurmukhi words
in the context of online handwriting recognition for
real-life use applications, such as maps navigation. We
have collected the data of 100 writers from the largest
cities in the Punjab region. The writers' variations,
such as writing skill level (beginner, moderate, and
expert), gender, right or left handedness, and their
adaptability to digital handwriting, have been
considered in dataset development. We have introduced a
novel technique to form handwritten stroke classes
based on a limited set of words. The presence of all
alphabets including vowels of Gurmukhi script has been
considered before selection of a word. The developed
dataset includes 39,411 strokes from handwritten words
and forms 72 classes of strokes after using a k-means
clustering technique and manual verification through
expert and moderate writers. We have achieved
recognition results using the Hidden Markov Model as
87.10\%, 85.43\%, and 84.33\% for middle zone strokes
when using training data as 66\%, 50\%, and 80\% of the
developed dataset. The present work is a step in a
direction to find groups for unknown handwriting
strokes with reasonably higher levels of accuracy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "1",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{El-Fiqi:2016:PCC,
author = "Heba El-Fiqi and Eleni Petraki and Hussein A. Abbass",
title = "Pairwise Comparative Classification for Translator
Stylometric Analysis",
journal = j-TALLIP,
volume = "16",
number = "1",
pages = "2:1--2:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2898997",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In this article, we present a new type of
classification problem, which we call Comparative
Classification Problem (CCP), where we use the term
data record to refer to a block of instances. Given a
single data record with n instances for n classes, the
CCP problem is to map each instance to a unique class.
This problem occurs in a wide range of applications
where the independent and identically distributed
assumption is broken down. The primary difference
between CCP and classical classification is that in the
latter, the assignment of a translator to one record is
independent of the assignment of a translator to a
different record. In CCP, however, the assignment of a
translator to one record within a block excludes this
translator from further assignments to any other record
in that block. The interdependency in the data poses
challenges for techniques relying on the independent
and identically distributed (iid) assumption. In the
Pairwise CCP (PWCCP), a pair of records is grouped
together. The key difference between PWCCP and
classical binary classification problems is that hidden
patterns can only be unmasked by comparing the
instances as pairs. In this article, we introduce a new
algorithm, PWC4.5, which is based on C4.5, to manage
PWCCP. We first show that a simple transformation-that
we call Gradient-Based Transformation (GBT)-can fix the
problem of iid in C4.5. We then evaluate PWC4.5 using
two real-world corpora to distinguish between
translators on Arabic-English and French-English
translations. While the traditional C4.5 failed to
distinguish between different translators, GBT
demonstrated better performance. Meanwhile, PWC4.5
consistently provided the best results over C4.5 and
GBT.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "2",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Qiao:2016:IUD,
author = "Xiuming Qiao and Hailong Cao and Tiejun Zhao",
title = "Improving Unsupervised Dependency Parsing with
Knowledge from Query Logs",
journal = j-TALLIP,
volume = "16",
number = "1",
pages = "3:1--3:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2903720",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Unsupervised dependency parsing becomes more and more
popular in recent years because it does not need
expensive annotations, such as treebanks, which are
required for supervised and semi-supervised dependency
parsing. However, its accuracy is still far below that
of supervised dependency parsers, partly due to the
fact that their parsing model is insufficient to
capture linguistic phenomena underlying texts. The
performance for unsupervised dependency parsing can be
improved by mining knowledge from the texts and by
incorporating it into the model. In this article,
syntactic knowledge is acquired from query logs to help
estimate better probabilities in dependency models with
valence. The proposed method is language independent
and obtains an improvement of 4.1\% unlabeled accuracy
on the Penn Chinese Treebank by utilizing additional
dependency relations from the Sogou query logs and
Baidu query logs. Morever, experiments show that the
proposed model achieves improvements of 8.07\% on CoNLL
2007 English using the AOL query logs. We believe query
logs are useful sources of syntactic knowledge for many
natural language processing (NLP) tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "3",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Passban:2016:BNP,
author = "Peyman Passban and Qun Liu and Andy Way",
title = "Boosting Neural {POS} Tagger for {Farsi} Using
Morphological Information",
journal = j-TALLIP,
volume = "16",
number = "1",
pages = "4:1--4:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2934676",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Farsi (Persian) is a low-resource language that
suffers from the data sparsity problem and a lack of
efficient processing tools. Due to their broad
application in natural language processing tasks,
part-of-speech (POS) taggers are one of those important
tools that should be considered in this respect.
Despite recent work on Farsi tagging, there is still
room for improvement. The best reported accuracy so far
is 96\%, which in special cases can rise to 96.9\%. The
main problem with existing taggers is their
inefficiency in coping with out-of-vocabulary (OOV)
words. Addressing both problems of accuracy and OOV
words, we developed a neural network-based POS tagger
(NPT) that performs efficiently on Farsi. Despite using
less data, NPT provides better results in comparison to
state-of-the-art systems. Our proposed tagger performs
with an accuracy of 97.4\%, with performance highly
influenced by morphological features. We carry out a
shallow morphological analysis and show considerable
improvement over the baseline configuration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "4",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2016:SBM,
author = "Liangliang Liu and Cungen Cao",
title = "A Seed-Based Method for Generating {Chinese} Confusion
Sets",
journal = j-TALLIP,
volume = "16",
number = "1",
pages = "5:1--5:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2933396",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In natural language, people often misuse a word
(called a ``confused word'') in place of other words
(called ``confusing words''). In misspelling
corrections, many approaches to finding and correcting
misspelling errors are based on a simple notion called
a ``confusion set.'' The confusion set of a confused
word consists of confusing words. In this article, we
propose a new method of building Chinese character
confusion sets. Our method is composed of two major
phases. In the first phase, we build a list of seed
confusion sets for each Chinese character, which is
based on measuring similarity in character pinyin or
similarity in character shape. In this phase, all
confusion sets are constructed manually, and the
confusion sets are organized into a graph, called a
``seed confusion graph'' (SCG), in which vertices
denote characters and edges are pairs of characters in
the form (confused character, confusing character). In
the second phase, we extend the SCG by acquiring more
pairs of (confused character, confusing character) from
a large Chinese corpus. For this, we use several word
patterns (or patterns) to generate new confusion pairs
and then verify the pairs before adding them into a
SCG. Comprehensive experiments show that our method of
extending confusion sets is effective. Also, we shall
use the confusion sets in Chinese misspelling
corrections to show the utility of our method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "5",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2016:ISP,
author = "Junhui Li and Muhua Zhu and Wei Lu and Guodong Zhou",
title = "Improving Semantic Parsing with Enriched Synchronous
Context-Free Grammars in Statistical Machine
Translation",
journal = j-TALLIP,
volume = "16",
number = "1",
pages = "6:1--6:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2963099",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Semantic parsing maps a sentence in natural language
into a structured meaning representation. Previous
studies show that semantic parsing with synchronous
context-free grammars (SCFGs) achieves favorable
performance over most other alternatives. Motivated by
the observation that the performance of semantic
parsing with SCFGs is closely tied to the translation
rules, this article explores to extend translation
rules with high quality and increased coverage in three
ways. First, we examine the difference between word
alignments for semantic parsing and statistical machine
translation (SMT) to better adapt word alignment in SMT
to semantic parsing. Second, we introduce both
structure and syntax informed nonterminals, better
guiding the parsing in favor of well-formed structure,
instead of using a uninformed nonterminal in SCFGs.
Third, we address the unknown word translation issue
via synthetic translation rules. Last but not least, we
use a filtering approach to improve performance via
predicting answer type. Evaluation on the standard
GeoQuery benchmark dataset shows that our approach
greatly outperforms the state of the art across various
languages, including English, Chinese, Thai, German,
and Greek.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "6",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Krishnamurthi:2016:UDS,
author = "Karthik Krishnamurthi and Vijayapal Reddy Panuganti
and Vishnu Vardhan Bulusu",
title = "Understanding Document Semantics from Summaries: a
Case Study on {Hindi} Texts",
journal = j-TALLIP,
volume = "16",
number = "1",
pages = "7:1--7:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2956236",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:51 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Summary of a document contains words that actually
contribute to the semantics of the document. Latent
Semantic Analysis (LSA) is a mathematical model that is
used to understand document semantics by deriving a
semantic structure based on patterns of word
correlations in the document. When using LSA to capture
semantics from summaries, it is observed that LSA
performs quite well despite being completely
independent of any external sources of semantics.
However, LSA can be remodeled to enhance its capability
to analyze correlations within texts. By taking
advantage of the model being language independent, this
article presents two stages of LSA remodeling to
understand document semantics in the Indian context,
specifically from Hindi text summaries. One stage of
remodeling is done by providing supplementary
information, such as document category and domain
information. The second stage of remodeling is done by
using a supervised term weighting measure in the
process. The remodeled LSA's performance is empirically
evaluated in a document classification application by
comparing the accuracies of classification to plain
LSA. An improvement in the performance of LSA in the
range of 4.7\% to 6.2\% is achieved from the remodel
when compared to the plain model. The results suggest
that summaries of documents efficiently capture the
semantic structure of documents and is an alternative
to full-length documents for understanding document
semantics.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "7",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Tursun:2016:STT,
author = "Eziz Tursun and Debasis Ganguly and Turghun Osman and
Ya-Ting Yang and Ghalip Abdukerim and Jun-Lin Zhou and
Qun Liu",
title = "A Semisupervised Tag-Transition-Based {Markovian}
Model for {Uyghur} Morphology Analysis",
journal = j-TALLIP,
volume = "16",
number = "2",
pages = "8:1--8:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2968410",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Morphological analysis, which includes analysis of
part-of-speech (POS) tagging, stemming, and morpheme
segmentation, is one of the key components in natural
language processing (NLP), particularly for
agglutinative languages. In this article, we
investigate the morphological analysis of the Uyghur
language, which is the native language of the people in
the Xinjiang Uyghur autonomous region of western China.
Morphological analysis of Uyghur is challenging
primarily because of factors such as (1) ambiguities
arising due to the likelihood of association of a
multiple number of POS tags with a word stem or a
multiple number of functional tags with a word suffix,
(2) ambiguous morpheme boundaries, and (3) complex
morphopholonogy of the language. Further, the
unavailability of a manually annotated training set in
the Uyghur language for the purpose of word
segmentation makes Uyghur morphological analysis more
difficult. In our proposed work, we address these
challenges by undertaking a semisupervised approach of
learning a Markov model with the help of a manually
constructed dictionary of ``suffix to tag'' mappings in
order to predict the most likely tag transitions in the
Uyghur morpheme sequence. Due to the linguistic
characteristics of Uyghur, we incorporate a prior
belief in our model for favoring word segmentations
with a lower number of morpheme units. Empirical
evaluation of our proposed model shows an accuracy of
about 82\%. We further improve the effectiveness of the
tag transition model with an active learning paradigm.
In particular, we manually investigated a subset of
words for which the model prediction ambiguity was
within the top 20\%. Manually incorporating rules to
handle these erroneous cases resulted in an overall
accuracy of 93.81\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "8",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Nguyen:2016:ACN,
author = "Long H. B. Nguyen and Dien Dinh and Phuoc Tran",
title = "An Approach to Construct a Named Entity Annotated
{English--Vietnamese} Bilingual Corpus",
journal = j-TALLIP,
volume = "16",
number = "2",
pages = "9:1--9:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2990191",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Manually constructing an annotated Named Entity (NE)
in a bilingual corpus is a time-consuming,
labor--intensive, and expensive process, but this is
necessary for natural language processing (NLP) tasks
such as cross-lingual information retrieval,
cross-lingual information extraction, machine
translation, etc. In this article, we present an
automatic approach to construct an annotated NE in
English-Vietnamese bilingual corpus from a bilingual
parallel corpus by proposing an aligned NE method.
Basing this corpus on a bilingual corpus in which the
initial NEs are extracted from its own language
separately, the approach tries to correct unrecognized
NEs or incorrectly recognized NEs before aligning the
NEs by using a variety of bilingual constraints. The
generated corpus not only improves the NE recognition
results but also creates alignments between English NEs
and Vietnamese NEs, which are necessary for training NE
translation models. The experimental results show that
the approach outperforms the baseline methods
effectively. In the English-Vietnamese NE alignment
task, the F-measure increases from 68.58\% to 79.77\%.
Thanks to the improvement of the NE recognition
quality, the proposed method also increases
significantly: the F-measure goes from 84.85\% to
88.66\% for the English side and from 75.71\% to
85.55\% for the Vietnamese side. By providing the
additional semantic information for the machine
translation systems, the BLEU score increases from
33.04\% to 45.11\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "9",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Chou:2016:BWN,
author = "Chien-Lung Chou and Chia-Hui Chang and Ya-Yun Huang",
title = "Boosted {Web} Named Entity Recognition via
Tri-Training",
journal = j-TALLIP,
volume = "16",
number = "2",
pages = "10:1--10:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2963100",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Named entity extraction is a fundamental task for many
natural language processing applications on the web.
Existing studies rely on annotated training data, which
is quite expensive to obtain large datasets, limiting
the effectiveness of recognition. In this research, we
propose a semisupervised learning approach for web
named entity recognition (NER) model construction via
automatic labeling and tri-training. The former
utilizes structured resources containing known named
entities for automatic labeling, while the latter makes
use of unlabeled examples to improve the extraction
performance. Since this automatically labeled training
data may contain noise, a self-testing procedure is
used as a follow-up to remove low-confidence annotation
and prepare higher-quality training data. Furthermore,
we modify tri-training for sequence labeling and derive
a proper initialization for large dataset training to
improve entity recognition. Finally, we apply this
semisupervised learning framework for person name
recognition, business organization name recognition,
and location name extraction. In the task of Chinese
NER, an F-measure of 0.911, 0.849, and 0.845 can be
achieved, for person, business organization, and
location NER, respectively. The same framework is also
applied for English and Japanese business organization
name recognition and obtains models with performance of
a 0.832 and 0.803 F-measure.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "10",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sadek:2016:DBA,
author = "Jawad Sadek and Farid Meziane",
title = "A Discourse-Based Approach for {Arabic} Question
Answering",
journal = j-TALLIP,
volume = "16",
number = "2",
pages = "11:1--11:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2988238",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "The treatment of complex questions with explanatory
answers involves searching for arguments in texts.
Because of the prominent role that discourse relations
play in reflecting text producers' intentions,
capturing the underlying structure of text constitutes
a good instructor in this issue. From our extensive
review, a system for automatic discourse analysis that
creates full rhetorical structures in large-scale
Arabic texts is currently unavailable. This is due to
the high computational complexity involved in
processing a large number of hypothesized relations
associated with large texts. Therefore, more practical
approaches should be investigated. This article
presents a new Arabic Text Parser oriented for
question-answering systems dealing with [Arabic
characters] ``why'' and [Arabic characters] ``how to''
questions. The Text Parser presented here considers the
sentence as the basic unit of text and incorporates a
set of heuristics to avoid computational explosion.
With this approach, the developed question-answering
system reached a significant improvement over the
baseline with a Recall of 68\% and MRR of 0.62.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "11",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Tran:2016:WRS,
author = "Phuoc Tran and Dien Dinh and Long H. B. Nguyen",
title = "Word Re-Segmentation in {Chinese--Vietnamese} Machine
Translation",
journal = j-TALLIP,
volume = "16",
number = "2",
pages = "12:1--12:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2988237",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In isolated languages, such as Chinese and Vietnamese,
words are not separated by spaces, and a word may be
formed by one or more syllables. Therefore, word
segmentation (WS) is usually the first process that is
implemented in the machine translation process. WS in
the source and target languages is based on different
training corpora, and WS approaches may not be the
same. Therefore, the WS that results in these two
languages are not often homologous, and thus word
alignment results in many 1-n and n-1 alignment pairs
in statistical machine translation, which degrades the
performance of machine translation. In this article, we
will adjust the WS for both Chinese and Vietnamese in
particular and for isolated language pairs in general
and make the word boundary of the two languages more
symmetric in order to strengthen 1-1 alignments and
enhance machine translation performance. We have tested
this method on the Computational Linguistics Center's
corpus, which consists of 35,623 sentence pairs. The
experimental results show that our method has
significantly improved the performance of machine
translation compared to the baseline translation
system, WS translation system, and anchor
language-based WS translation systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "12",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2016:MSC,
author = "Peifeng Li and Guodong Zhou and Qiaoming Zhu",
title = "Minimally Supervised {Chinese} Event Extraction from
Multiple Views",
journal = j-TALLIP,
volume = "16",
number = "2",
pages = "13:1--13:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2994600",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Although several semi-supervised learning models have
been proposed for English event extraction, there are
few successful stories in Chinese due to its special
characteristics. In this article, we propose a novel
minimally supervised model for Chinese event extraction
from multiple views. Besides the traditional pattern
similarity view (PSV), a semantic relationship view
(SRV) is introduced to capture the relevant event
mentions from relevant documents. Moreover, a
morphological structure view (MSV) is incorporated to
both infer more positive patterns and help filter
negative patterns via morphological structure
similarity. An evaluation of the ACE 2005 Chinese
corpus shows that our minimally supervised model
significantly outperforms several strong baselines.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "13",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Atreya:2016:QER,
author = "Arjun {Atreya V} and Ashish Kankaria and Pushpak
Bhattacharyya and Ganesh Ramakrishnan",
title = "Query Expansion in Resource-Scarce Languages: a
Multilingual Framework Utilizing Document Structure",
journal = j-TALLIP,
volume = "16",
number = "2",
pages = "14:1--14:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2997643",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Retrievals in response to queries to search engines in
resource-scarce languages often produce no results,
which annoys the user. In such cases, at least
partially relevant documents must be retrieved. We
propose a novel multilingual framework, MultiStructPRF,
which expands the query with related terms by (i) using
a resource-rich assisting language and (ii) giving
varied importance to the expansion terms depending on
their position of occurrence in the document. Our
system uses the help of an assisting language to expand
the query in order to improve system recall. We propose
a systematic expansion model for weighting the
expansion terms coming from different parts of the
document. To combine the expansion terms from query
language and assisting language, we propose a
heuristics-based fusion model. Our experimental results
show an improvement over other PRF techniques in both
precision and recall for multiple resource-scarce
languages like Marathi, Bengali, Odia, Finnish, and the
like. We study the effect of different assisting
languages on precision and recall for multiple query
languages. Our experiments reveal an interesting fact:
Precision is positively correlated with the typological
closeness of query language and assisting language,
whereas recall is positively correlated with the
resource richness of the assisting language.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "14",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Finch:2017:IBL,
author = "Andrew Finch and Taisuke Harada and Kumiko
Tanaka-Ishii and Eiichiro Sumita",
title = "Inducing a Bilingual Lexicon from Short Parallel
Multiword Sequences",
journal = j-TALLIP,
volume = "16",
number = "3",
pages = "15:1--15:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3003726",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article proposes a technique for mining bilingual
lexicons from pairs of parallel short word sequences.
The technique builds a generative model from a corpus
of training data consisting of such pairs. The model is
a hierarchical nonparametric Bayesian model that
directly induces a bilingual lexicon while training.
The model learns in an unsupervised manner and is
designed to exploit characteristics of the language
pairs being mined. The proposed model is capable of
utilizing commonly used word-pair frequency information
and additionally can employ the internal character
alignments within the words themselves. It is thereby
capable of mining transliterations and can use reliably
aligned transliteration pairs to support the mining of
other words in their context. The model is also capable
of performing word reordering and word deletion during
the alignment process, and it is furthermore capable of
operating in the absence of full segmentation
information. In this work, we study two mining tasks
based on English-Japanese and English--Chinese language
pairs, and compare the proposed approach to baselines
based on a simpler models that use only word-pair
frequency information. Our results show that the
proposed method is able to mine bilingual word pairs at
higher levels of precision and recall than the
baselines.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "15",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2017:CSC,
author = "Shaonan Wang and Chengqing Zong",
title = "Comparison Study on Critical Components in Composition
Model for Phrase Representation",
journal = j-TALLIP,
volume = "16",
number = "3",
pages = "16:1--16:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3010088",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Phrase representation, an important step in many NLP
tasks, involves representing phrases as
continuous-valued vectors. This article presents
detailed comparisons concerning the effects of word
vectors, training data, and the composition and
objective function used in a composition model for
phrase representation. Specifically, we first discuss
how the augmented word representations affect the
performance of the composition model. Then, we
investigate whether different types of training data
influence the performance of the composition model and,
if so, how they influence it. Finally, we evaluate
combinations of different composition and objective
functions and discuss the factors related to
composition model performance. All evaluations were
conducted in both English and Chinese. Our main
findings are as follows: (1) The Additive model with
semantic enhanced word vectors performs comparably to
the state-of-the-art model; (2) The Additive model
which updates augmented word vectors and the Matrix
model with semantic enhanced word vectors
systematically outperforms the state-of-the-art model
in bigram and multi-word phrase similarity task,
respectively; (3) Representing the high frequency
phrases by estimating their surrounding contexts is a
good training objective for bigram phrase similarity
tasks; and (4) The performance gain of composition
model with semantic enhanced word vectors is due to the
composition function and the greater weight attached to
important words. Previous works focus on the
composition function; however, our findings indicate
that other components in the composition model
(especially word representation) make a critical
difference in phrase representation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "16",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bhat:2017:ITB,
author = "Riyaz Ahmad Bhat and Irshad Ahmad Bhat and Dipti Misra
Sharma",
title = "Improving Transition-Based Dependency Parsing of
{Hindi} and {Urdu} by Modeling Syntactically Relevant
Phenomena",
journal = j-TALLIP,
volume = "16",
number = "3",
pages = "17:1--17:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3005447",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In recent years, transition-based parsers have shown
promise in terms of efficiency and accuracy. Though
these parsers have been extensively explored for
multiple Indian languages, there is still considerable
scope for improvement by properly incorporating
syntactically relevant information. In this article, we
enhance transition-based parsing of Hindi and Urdu by
redefining the features and feature extraction
procedures that have been previously proposed in the
parsing literature of Indian languages. We propose and
empirically show that properly incorporating
syntactically relevant information like case marking,
complex predication and grammatical agreement in an
arc-eager parsing model can significantly improve
parsing accuracy. Our experiments show an absolute
improvement of $ \approx 2 $ \% LAS for parsing of both
Hindi and Urdu over a competitive baseline which uses
rich features like part-of-speech (POS) tags, chunk
tags, cluster ids and lemmas. We also propose some
heuristics to identify ezafe constructions in Urdu
texts which show promising results in parsing these
constructions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "17",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Das:2017:NER,
author = "Arjun Das and Debasis Ganguly and Utpal Garain",
title = "Named Entity Recognition with Word Embeddings and
{Wikipedia} Categories for a Low-Resource Language",
journal = j-TALLIP,
volume = "16",
number = "3",
pages = "18:1--18:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3015467",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In this article, we propose a word embedding--based
named entity recognition (NER) approach. NER is
commonly approached as a sequence labeling task with
the application of methods such as conditional random
field (CRF). However, for low-resource languages
without the presence of sufficiently large training
data, methods such as CRF do not perform well. In our
work, we make use of the proximity of the vector
embeddings of words to approach the NER problem. The
hypothesis is that word vectors belonging to the same
name category, such as a person's name, occur in close
vicinity in the abstract vector space of the embedded
words. Assuming that this clustering hypothesis is
true, we apply a standard classification approach on
the vectors of words to learn a decision boundary
between the NER classes. Our NER experiments are
conducted on a morphologically rich and low-resource
language, namely Bengali. Our approach significantly
outperforms standard baseline CRF approaches that use
cluster labels of word embeddings and gazetteers
constructed from Wikipedia. Further, we propose an
unsupervised approach (that uses an automatically
created named entity (NE) gazetteer from Wikipedia in
the absence of training data). For a low-resource
language, the word vectors obtained from Wikipedia are
not sufficient to train a classifier. As a result, we
propose to make use of the distance measure between the
vector embeddings of words to expand the set of
Wikipedia training examples with additional NEs
extracted from a monolingual corpus that yield
significant improvement in the unsupervised NER
performance. In fact, our expansion method performs
better than the traditional CRF-based (supervised)
approach (i.e., F-score of 65.4\% vs. 64.2\%). Finally,
we compare our proposed approach to the official
submission for the IJCNLP-2008 Bengali NER shared task
and achieve an overall improvement of F-score 11.26\%
with respect to the best official system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "18",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2017:IDR,
author = "Haoran Li and Jiajun Zhang and Chengqing Zong",
title = "Implicit Discourse Relation Recognition for {English}
and {Chinese} with Multiview Modeling and Effective
Representation Learning",
journal = j-TALLIP,
volume = "16",
number = "3",
pages = "19:1--19:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3028772",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Discourse relations between two text segments play an
important role in many Natural Language Processing
(NLP) tasks. The connectives strongly indicate the
sense of discourse relations, while in fact, there are
no connectives in a large proportion of discourse
relations, that is, implicit discourse relations.
Compared with explicit relations, implicit relations
are much harder to detect and have drawn significant
attention. Until now, there have been many studies
focusing on English implicit discourse relations, and
few studies address implicit relation recognition in
Chinese even though the implicit discourse relations in
Chinese are more common than those in English. In our
work, both the English and Chinese languages are our
focus. The key to implicit relation prediction is to
properly model the semantics of the two discourse
arguments, as well as the contextual interaction
between them. To achieve this goal, we propose a neural
network based framework that consists of two
hierarchies. The first one is the model hierarchy, in
which we propose a max-margin learning method to
explore the implicit discourse relation from multiple
views. The second one is the feature hierarchy, in
which we learn multilevel distributed representations
from words, arguments, and syntactic structures to
sentences. We have conducted experiments on the
standard benchmarks of English and Chinese, and the
results show that compared with several methods our
proposed method can achieve the best performance in
most cases.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "19",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Tholpadi:2017:CBT,
author = "Goutham Tholpadi and Chiranjib Bhattacharyya and
Shirish Shevade",
title = "Corpus-Based Translation Induction in {Indian}
Languages Using Auxiliary Language Corpora from
{Wikipedia}",
journal = j-TALLIP,
volume = "16",
number = "3",
pages = "20:1--20:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3038295",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Identifying translations from comparable corpora is a
well-known problem with several applications. Existing
methods rely on linguistic tools or high-quality
corpora. Absence of such resources, especially in
Indian languages, makes this problem hard; for example,
state-of-the-art techniques achieve a mean reciprocal
rank of 0.66 for English--Italian, and a mere 0.187 for
Telugu-Kannada. In this work, we address the problem of
comparable corpora-based translation correspondence
induction (CC-TCI) when the only resources available
are small noisy comparable corpora extracted from
Wikipedia. We observe that translations in the source
and target languages have many topically related words
in common in other ``auxiliary'' languages. To model
this, we define the notion of a translingual theme, a
set of topically related words from auxiliary language
corpora, and present a probabilistic framework for
CC-TCI. Extensive experiments on 35 comparable corpora
showed dramatic improvements in performance. We extend
these ideas to propose a method for measuring
cross-lingual semantic relatedness (CLSR) between
words. To stimulate further research in this area, we
make publicly available two new high-quality
human-annotated datasets for CLSR. Experiments on the
CLSR datasets show more than 200\% improvement in
correlation on the CLSR task. We apply the method to
the real-world problem of cross-lingual Wikipedia title
suggestion and build the WikiTSu system. A user study
on WikiTSu shows a 20\% improvement in the quality of
titles suggested.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "20",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhao:2017:HMC,
author = "Hai Zhao and Deng Cai and Yang Xin and Yuzhu Wang and
Zhongye Jia",
title = "A Hybrid Model for {Chinese} Spelling Check",
journal = j-TALLIP,
volume = "16",
number = "3",
pages = "21:1--21:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3047405",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Apr 3 08:15:52 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Spelling check for Chinese has more challenging
difficulties than that for other languages. A hybrid
model for Chinese spelling check is presented in this
article. The hybrid model consists of three components:
one graph-based model for generic errors and two
independently trained models for specific errors. In
the graph model, a directed acyclic graph is generated
for each sentence, and the single-source shortest-path
algorithm is performed on the graph to detect and
correct general spelling errors at the same time. Prior
to that, two types of errors over functional words
(characters) are first solved by conditional random
fields: the confusion of ``[Chinese characters]'' (at)
(pinyin is zai in Chinese), ``[Chinese characters]''
(again, more, then) (pinyin: zai) and ``[Chinese
characters]'' (of) (pinyin: de), ``[Chinese
characters]'' (- ly, adverb-forming particle) (pinyin:
de), and ``[Chinese characters]'' (so that, have to)
(pinyin: de). Finally, a rule-based model is exploited
to distinguish pronoun usage confusion: ``[Chinese
characters]'' (she) (pinyin: ta), ``[Chinese
characters]'' (he) (pinyin: ta), and some other common
collocation errors. The proposed model is evaluated on
the standard datasets released by the SIGHAN Bake-off
shared tasks, giving state-of-the-art results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "21",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wali:2017:ECL,
author = "Wafa Wali and Bilel Gargouri and Adelmajid Ben
Hamadou",
title = "Evaluating the Content of {LMF} Standardized
Dictionaries: a Practical Experiment on {Arabic}
Language",
journal = j-TALLIP,
volume = "16",
number = "4",
pages = "22:1--22:??",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3047406",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Since the age of paper versions, dictionaries are
often published with anomalies in their content
resulting from lexicographer's mistakes or from the
lack of efficiency of automatic enrichment systems.
Many of these anomalies are expensive to manually
detect and difficult to automatically control, notably
with lightly structured models of dictionaries. In this
article, we take advantage of the fine structure
proposed by the Lexical Markup Framework (LMF) norm to
investigate the detection of anomalies in the content
of LMF normalized dictionaries. First, we give a
theoretical study on the plausible anomalies, such as
inconsistency, incoherence, redundancy, and
incompleteness. Second, we detail the approach that we
propose for the automatic detection of such anomalies.
Finally, we report on an experiment carried out on an
available normalized dictionary of the Arabic language.
The experiment has shown that the proposed approach
gives reasonable results in terms of precision and
recall.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "22",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Baly:2017:STM,
author = "Ramy Baly and Hazem Hajj and Nizar Habash and Khaled
Bashir Shaban and Wassim El-Hajj",
title = "A Sentiment {Treebank} and Morphologically Enriched
Recursive Deep Models for Effective Sentiment Analysis
in {Arabic}",
journal = j-TALLIP,
volume = "16",
number = "4",
pages = "23:1--23:??",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3086576",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Accurate sentiment analysis models encode the
sentiment of words and their combinations to predict
the overall sentiment of a sentence. This task becomes
challenging when applied to morphologically rich
languages (MRL). In this article, we evaluate the use
of deep learning advances, namely the Recursive Neural
Tensor Networks (RNTN), for sentiment analysis in
Arabic as a case study of MRLs. While Arabic may not be
considered the only representative of all MRLs, the
challenges faced and proposed solutions in Arabic are
common to many other MRLs. We identify, illustrate, and
address MRL-related challenges and show how RNTN is
affected by the morphological richness and orthographic
ambiguity of the Arabic language. To address the
challenges with sentiment extraction from text in MRL,
we propose to explore different orthographic features
as well as different morphological features at multiple
levels of abstraction ranging from raw words to roots.
A key requirement for RNTN is the availability of a
sentiment treebank; a collection of syntactic parse
trees annotated for sentiment at all levels of
constituency and that currently only exists in English.
Therefore, our contribution also includes the creation
of the first Arabic Sentiment Treebank (A rSenTB) that
is morphologically and orthographically enriched.
Experimental results show that, compared to the basic
RNTN proposed for English, our solution achieves
significant improvements up to 8\% absolute at the
phrase level and 10.8\% absolute at the sentence level,
measured by average F1 score. It also outperforms
well-known classifiers including Support Vector
Machines, Recursive Auto Encoders, and Long Short-Term
Memory by 7.6\%, 3.2\%, and 1.6\% absolute
respectively, all models being trained with similar
morphological considerations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "23",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Punchimudiyanse:2017:AFW,
author = "Malinda Punchimudiyanse and Ravinda Gayan Narendra
Meegama",
title = "Animation of Fingerspelled Words and Number Signs of
the {Sinhala} Sign Language",
journal = j-TALLIP,
volume = "16",
number = "4",
pages = "24:1--24:??",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092743",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Sign language is the primary communication medium of
the aurally handicapped community. Often, a sign
gesture is mapped to a word or a phrase in a spoken
language and named as a conversational sign. A
fingerspelling sign is a special sign derived to show a
single character that matches a character in the
alphabet of a given language. This enables the deaf
community to express words that do not have a
conversational sign, such as a name, using a
letter-by-letter technique. Sinhala Sign Language (SSL)
uses a phonetic pronunciation mechanism to decode such
words due to the presence of one or more modifiers
after a consonant. Expressing numbers also have a
similar notation, and it is broken down into parts
before interpretation in sign gestures. This article
presents the variations implemented to make the 3D
avatar-based interpreter system look similar to an
actual fingerspelled SSL by a human interpreter. To
accomplish the task, a phonetic English-based 3D avatar
animation system is developed with Blender animation
software. The conversion of Sinhala Unicode text to
phonetic English and numbers written in digits to sign
gestures is done with a Visual Basic.NET (VB.NET)
application. The presented application has 61 SSL
fingerspelling signs and 40 SSL number signs. It is
capable of interpreting any word written using the
modern Sinhala alphabet without conversational signs
and interprets the numbers that go up to the billions.
This is a helpful tool in teaching SSL fingerspelling
and number signs of SSL to deaf children.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "24",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Al-Sallab:2017:ARD,
author = "Ahmad Al-Sallab and Ramy Baly and Hazem Hajj and
Khaled Bashir Shaban and Wassim El-Hajj and Gilbert
Badaro",
title = "{AROMA}: a Recursive Deep Learning Model for Opinion
Mining in {Arabic} as a Low Resource Language",
journal = j-TALLIP,
volume = "16",
number = "4",
pages = "25:1--25:??",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3086575",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "While research on English opinion mining has already
achieved significant progress and success, work on
Arabic opinion mining is still lagging. This is mainly
due to the relative recency of research efforts in
developing natural language processing (NLP) methods
for Arabic, handling its morphological complexity, and
the lack of large-scale opinion resources for Arabic.
To close this gap, we examine the class of models used
for English and that do not require extensive use of
NLP or opinion resources. In particular, we consider
the Recursive Auto Encoder (RAE). However, RAE models
are not as successful in Arabic as they are in English,
due to their limitations in handling the morphological
complexity of Arabic, providing a more complete and
comprehensive input features for the auto encoder, and
performing semantic composition following the natural
way constituents are combined to express the overall
meaning. In this article, we propose A Recursive Deep
Learning Model for Opinion Mining in Arabic (AROMA)
that addresses these limitations. AROMA was evaluated
on three Arabic corpora representing different genres
and writing styles. Results show that AROMA achieved
significant performance improvements compared to the
baseline RAE. It also outperformed several well-known
approaches in the literature.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "25",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kong:2017:CSE,
author = "Fang Kong and Guodong Zhou",
title = "A {CDT}-Styled End-to-End {Chinese} Discourse Parser",
journal = j-TALLIP,
volume = "16",
number = "4",
pages = "26:1--26:??",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3099557",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Discourse parsing is a challenging task and plays a
critical role in discourse analysis. Since the release
of the Rhetorical Structure Theory Discourse Treebank
and the Penn Discourse Treebank, the research on
English discourse parsing has attracted increasing
attention and achieved considerable success in recent
years. At the same time, some preliminary research on
certain subtasks about discourse parsing for other
languages, such as Chinese, has been conducted. In this
article, we present an end-to-end Chinese discourse
parser with the Connective-Driven Dependency Tree
scheme, which consists of multiple components in a
pipeline architecture, such as the elementary discourse
unit (EDU) detector, discourse relation recognizer,
discourse parse tree generator, and attribution
labeler. In particular, the attribution labeler
determines two attributions (i.e., sense and centering)
for every nonterminal node (i.e., discourse relation)
in the discourse parse trees. Systematically, our
parser detects all EDUs in a free text, generates the
discourse parse tree in a bottom-up way, and determines
the sense and centering attributions for all
nonterminal nodes by traversing the discourse parse
tree. Comprehensive evaluation on the Connective-Driven
Dependency Treebank corpus from both component-wise and
error-cascading perspectives is conducted to illustrate
how each component performs in isolation, and how the
pipeline performs with error propagation. Finally, it
shows that our end-to-end Chinese discourse parser
achieves an overall F1 score of 20\% with full
automation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "26",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2017:PAL,
author = "Shih-Hung Liu and Kuan-Yu Chen and Yu-Lun Hsieh and
Berlin Chen and Hsin-Min Wang and Hsu-Chun Yen and
Wen-Lian Hsu",
title = "A Position-Aware Language Modeling Framework for
Extractive Broadcast News Speech Summarization",
journal = j-TALLIP,
volume = "16",
number = "4",
pages = "27:1--27:??",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3099472",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Extractive summarization, a process that automatically
picks exemplary sentences from a text (or spoken)
document with the goal of concisely conveying key
information therein, has seen a surge of attention from
scholars and practitioners recently. Using a language
modeling (LM) approach for sentence selection has been
proven effective for performing unsupervised extractive
summarization. However, one of the major difficulties
facing the LM approach is to model sentences and
estimate their parameters more accurately for each text
(or spoken) document. We extend this line of research
and make the following contributions in this work.
First, we propose a position-aware language modeling
framework using various granularities of
position-specific information to better estimate the
sentence models involved in the summarization process.
Second, we explore disparate ways to integrate the
positional cues into relevance models through a
pseudo-relevance feedback procedure. Third, we
extensively evaluate various models originated from our
proposed framework and several well-established
unsupervised methods. Empirical evaluation conducted on
a broadcast news summarization task further
demonstrates performance merits of the proposed
summarization methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "27",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Phani:2017:SLA,
author = "Shanta Phani and Shibamouli Lahiri and Arindam
Biswas",
title = "A Supervised Learning Approach for Authorship
Attribution of {Bengali} Literary Texts",
journal = j-TALLIP,
volume = "16",
number = "4",
pages = "28:1--28:??",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3099473",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Authorship Attribution is a long-standing problem in
Natural Language Processing. Several statistical and
computational methods have been used to find a solution
to this problem. In this article, we have proposed
methods to deal with the authorship attribution problem
in Bengali. More specifically, we proposed a supervised
framework consisting of lexical and shallow features
and investigated the possibility of using
topic-modeling-inspired features, to classify documents
according to their authors. We have created a corpus
from nearly all the literary works of three eminent
Bengali authors, consisting of 3,000 disjoint samples.
Our models showed better performance than the
state-of-the-art, with more than 98\% test accuracy for
the shallow features and 100\% test accuracy for the
topic-based features. Further experiments with GloVe
vectors [Pennington et al. 2014] showed comparable
results, but flexible patterns based on content words
and high-frequency words [Schwartz et al. 2013] failed
to perform as well as expected.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "28",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Passban:2017:TLR,
author = "Peyman Passban and Qun Liu and Andy Way",
title = "Translating Low-Resource Languages by Vocabulary
Adaptation from Close Counterparts",
journal = j-TALLIP,
volume = "16",
number = "4",
pages = "29:1--29:??",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3099556",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Some natural languages belong to the same family or
share similar syntactic and/or semantic regularities.
This property persuades researchers to share
computational models across languages and benefit from
high-quality models to boost existing low-performance
counterparts. In this article, we follow a similar
idea, whereby we develop statistical and neural machine
translation (MT) engines that are trained on one
language pair but are used to translate another
language. First we train a reliable model for a
high-resource language, and then we exploit
cross-lingual similarities and adapt the model to work
for a close language with almost zero resources. We
chose Turkish (Tr) and Azeri or Azerbaijani (Az) as the
proposed pair in our experiments. Azeri suffers from
lack of resources as there is almost no bilingual
corpus for this language. Via our techniques, we are
able to train an engine for the Az -{$>$} English (En)
direction, which is able to outperform all other
existing models.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "29",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{S:2017:RMI,
author = "Sreelekha S. and Pushpak Bhattacharyya",
title = "Role of Morphology Injection in {SMT}: a Case Study
from {Indian} Language Perspective",
journal = j-TALLIP,
volume = "17",
number = "1",
pages = "1:1--1:??",
month = nov,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3129208",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Phrase-based Statistical Machine Translation (PBSMT)
is commonly used for automatic translation. However,
PBSMT runs into difficulty when either or both of the
source and target languages are morphologically rich.
Factored models are found to be useful for such cases,
as they consider word as a vector of factors. These
factors can contain any information about the surface
word and use it while translating. The objective of the
current work is to handle morphological inflections in
Hindi, Marathi, and Malayalam using Factored
translation models when translating from English.
Statistical MT approaches face the problem of data
sparsity when translating to a morphologically rich
language. It is very unlikely for a parallel corpus to
contain all morphological forms of words. We propose a
solution to generate these unseen morphological forms
and inject them into the original training corpus. We
propose a simple and effective solution based on
enriching the input with various morphological forms of
words. We observe that morphology injection improves
the quality of translation in terms of both adequacy
and fluency. We verify this with experiments on three
morphologically rich languages when translating from
English. From the detailed evaluations, we observed an
order of magnitude improvement in translation
quality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "1",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Malik:2017:UNE,
author = "Muhammad Kamran Malik",
title = "{Urdu} Named Entity Recognition and Classification
System Using Artificial Neural Network",
journal = j-TALLIP,
volume = "17",
number = "1",
pages = "2:1--2:??",
month = nov,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3129290",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Named Entity Recognition and Classification (NERC) is
a process of identifying words and classifying them
into person names, location names, organization names,
and so on. In this article, we discuss the development
of an Urdu Named Entity (NE) corpus, called the
Kamran-PU-NE (KPU-NE) corpus, for three entity types,
that is, Person, Organization, and Location, and
marking the remaining tokens as Others (O). We use two
supervised learning algorithms, Hidden Markov Model
(HMM) and Artificial Neural Network (ANN), for the
development of the Urdu NERC system. We annotate the
652852-token corpus taken from 15 different genres with
a total of 44480 NEs. The inter-annotator agreement
between the two annotators in terms of Kappa k
statistic is 73.41\%. With HMM, the highest recorded
precision, recall, and f-measure values are 55.98\%,
83.11\%, and 66.90\%, respectively, and with ANN, they
are 81.05\%, 87.54\%, and 84.17\%, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "2",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kim:2017:PEN,
author = "Hyun Kim and Hun-Young Jung and Hongseok Kwon and
Jong-Hyeok Lee and Seung-Hoon Na",
title = "Predictor--Estimator: Neural Quality Estimation Based
on Target Word Prediction for Machine Translation",
journal = j-TALLIP,
volume = "17",
number = "1",
pages = "3:1--3:??",
month = nov,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3109480",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Recently, quality estimation has been attracting
increasing interest from machine translation
researchers, aiming at finding a good estimator for the
``quality'' of machine translation output. The common
approach for quality estimation is to treat the problem
as a supervised regression/classification task using a
quality-annotated noisy parallel corpus, called quality
estimation data, as training data. However, the
available size of quality estimation data remains
small, due to the too-expensive cost of creating such
data. In addition, most conventional quality estimation
approaches rely on manually designed features to model
nonlinear relationships between feature vectors and
corresponding quality labels. To overcome these
problems, this article proposes a novel neural network
architecture for quality estimation task-called the
predictor-estimator -that considers word prediction as
an additional pre-task. The major component of the
proposed neural architecture is a word prediction model
based on a modified neural machine translation model-a
probabilistic model for predicting a target word
conditioned on all the other source and target
contexts. The underlying assumption is that the word
prediction model is highly related to quality
estimation models and is therefore able to transfer
useful knowledge to quality estimation tasks. Our
proposed quality estimation method sequentially trains
the following two types of neural models: (1)
Predictor: a neural word prediction model trained from
parallel corpora and (2) Estimator: a neural quality
estimation model trained from quality estimation data.
To transfer word a prediction task to a quality
estimation task, we generate quality estimation feature
vectors from the word prediction model and feed them
into the quality estimation model. The experimental
results on WMT15 and 16 quality estimation datasets
show that our proposed method has great potential in
the various sub-challenges.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "3",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Almeman:2017:ABV,
author = "Khalid Almeman",
title = "Automatically Building {VoIP} Speech Parallel Corpora
for {Arabic} Dialects",
journal = j-TALLIP,
volume = "17",
number = "1",
pages = "4:1--4:??",
month = nov,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3132708",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article discusses the process of automatically
building Arabic multi-dialect speech corpora using
Voice over Internet Protocol (VoIP). The Asterisk
framework was adopted to act as the main connection
between the parties, for which two virtual machines
were created: a sender and a receiver. The sender makes
a VoIP call to the receiver using the Asterisk
framework, while the receiver records the call
automatically, a process that is repeated for all the
audio files involved in the corpora. In this work, more
than 67,000 automatic calls were made between the
sender and receiver machines, generating VoIP Arabic
corpora for four Arabic dialects. The resulting corpora
can be considered the first Arabic VoIP parallel speech
corpora and will be made freely available to
researchers in Arabic NLP and speech recognition
research.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "4",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Tran:2017:LRB,
author = "Phuoc Tran and Dien Dinh and Tan Le and Long H. B.
Nguyen",
title = "Linguistic-Relationships-Based Approach for Improving
Word Alignment",
journal = j-TALLIP,
volume = "17",
number = "1",
pages = "5:1--5:??",
month = nov,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3133323",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "The unsupervised word alignments (such as GIZA++) are
widely used in the phrase-based statistical machine
translation. The quality of the model is proportional
to the size and the quality of the bilingual corpus.
However, for low-resource language pairs such as
Chinese and Vietnamese, a result of unsupervised word
alignment sometimes is of low quality due to the sparse
data. In addition, this model does not take advantage
of the linguistic relationships to improve performance
of word alignment. Chinese and Vietnamese have the same
language type and have close linguistic relationships.
In this article, we integrate the characteristics of
linguistic relationships into the word alignment model
to enhance the quality of Chinese-Vietnamese word
alignment. These linguistic relationships are
Sino-Vietnamese and content word. The experimental
results showed that our method improved the performance
of word alignment as well as the quality of machine
translation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "5",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Cheng:2017:ECC,
author = "Xiyao Cheng and Ying Chen and Bixiao Cheng and
Shoushan Li and Guodong Zhou",
title = "An Emotion Cause Corpus for {Chinese} Microblogs with
Multiple-User Structures",
journal = j-TALLIP,
volume = "17",
number = "1",
pages = "6:1--6:??",
month = nov,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3132684",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "A notably challenging problem in emotion analysis is
recognizing the cause of an emotion. Although there
have been a few studies on emotion cause detection,
most of them work on news reports or a few of them
focus on microblogs using a single-user structure
(i.e., all texts in a microblog are written by the same
user). In this article, we focus on emotion cause
detection for Chinese microblogs using a multiple-user
structure (i.e., texts in a microblog are successively
written by several users). First, based on the fact
that the causes of an emotion of a focused user may be
provided by other users in a microblog with the
multiple-user structure, we design an emotion cause
annotation scheme which can deal with such a
complicated case, and then provide an emotion cause
corpus using the annotation scheme. Second, based on
the analysis of the emotion cause corpus, we formalize
two emotion cause detection tasks for microblogs
(current-subtweet-based emotion cause detection and
original-subtweet-based emotion cause detection).
Furthermore, in order to examine the difficulty of the
two emotion cause detection tasks and the contributions
of texts written by different users in a microblog with
the multiple-user structure, we choose two popular
classification methods (SVM and LSTM) to do emotion
cause detection. Our experiments show that the
current-subtweet-based emotion cause detection is much
more difficult than the original-subtweet-based emotion
cause detection, and texts written by different users
are very helpful for both emotion cause detection
tasks. This study presents a pilot study of emotion
cause detection which deals with Chinese microblogs
using a complicated structure.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "6",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sarma:2017:DAS,
author = "Himangshu Sarma and Navanath Saharia and Utpal
Sharma",
title = "Development and Analysis of Speech Recognition Systems
for {Assamese} Language Using {HTK}",
journal = j-TALLIP,
volume = "17",
number = "1",
pages = "7:1--7:??",
month = nov,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3137055",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Language analysis is very important for the native
speaker to connect with the digital world. Assamese is
a relatively unexplored language. In this report, we
analyze different aspects of speech-to-text processing,
starting from building a speech corpus, defining
syllable rules, and finally developing a speech search
engine of Assamese. We have collected about 20 hours of
speech in three (viz., read, extempore, and
conversation) modes and transcribed it. We also discuss
some issues and challenges faced during development of
the corpus. We have developed an automatic
syllabification model with 11 rules for the Assamese
language and found an accuracy of more than 95\% in our
result. We found 12 different syllable patterns where 5
are found most frequent. The maximum length of a
syllable found is four letters. With the help of Hidden
Markov Model Toolkit (HTK) 3.5, we used deep learning
based neural network for our speech recognition model,
where we obtained 78.05\% accuracy for automatic
transcription of Assamese speech.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "7",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bhattacharya:2017:COB,
author = "Nilanjana Bhattacharya and Umapada Pal and Partha
Pratim Roy",
title = "Cleaning of Online {Bangla} Free-form Handwritten
Text",
journal = j-TALLIP,
volume = "17",
number = "1",
pages = "8:1--8:??",
month = nov,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3145538",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Dec 23 10:06:06 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In the normal free-form handwritten text, repetition
(repeated writing of the same stroke several times in
the same place), over-writing, and crossing out are
very common. In this article, we call the presence of
these three types of writing as ``noise.'' Cleaning to
extract useful text from such types of noisy text is an
important task for robust recognition. To the best of
our knowledge, no work has been reported on cleaning of
such noise from online text in any scripts and hence,
in this article, we propose an automatic text-cleaning
approach for online handwriting recognition. Here, at
first, crossing out noise with straight strike-through
lines is detected using the straightness criteria of
online strokes. Next, regions containing repetition,
over-writing, and other types of crossing out are
located using the positional information of the
overlapping strokes. Stroke density, self-intersections
of strokes etc. are computed from the strokes of
located regions to predict the type of noise and this
type of information is used as follows for their
cleaning. For cleaning of crossing outs, all strokes of
the crossing-out region are removed. For cleaning
repetition and over-writing, strokes written earlier
are removed, keeping the latest strokes. Finally,
delayed strokes are properly arranged and word is
passed to online recognizer. Though recognition of
free-form handwriting is quite difficult, in this
attempt, we obtained up to 70.71\% improvement in
word-recognition accuracy after noise cleaning.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "8",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Nasution:2018:GCA,
author = "Arbi Haza Nasution and Yohei Murakami and Toru
Ishida",
title = "A Generalized Constraint Approach to Bilingual
Dictionary Induction for Low-Resource Language
Families",
journal = j-TALLIP,
volume = "17",
number = "2",
pages = "9:1--9:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3138815",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "The lack or absence of parallel and comparable corpora
makes bilingual lexicon extraction a difficult task for
low-resource languages. The pivot language and cognate
recognition approaches have been proven useful for
inducing bilingual lexicons for such languages. We
propose constraint-based bilingual lexicon induction
for closely related languages by extending constraints
from the recent pivot-based induction technique and
further enabling multiple symmetry assumption cycle to
reach many more cognates in the transgraph. We further
identify cognate synonyms to obtain many-to-many
translation pairs. This article utilizes four datasets:
one Austronesian low-resource language and three
Indo-European high-resource languages. We use three
constraint-based methods from our previous work, the
Inverse Consultation method and translation pairs
generated from Cartesian product of input dictionaries
as baselines. We evaluate our result using the metrics
of precision, recall, and F-score. Our customizable
approach allows the user to conduct cross validation to
predict the optimal hyperparameters (cognate threshold
and cognate synonym threshold) with various combination
of heuristics and number of symmetry assumption cycles
to gain the highest F-score. Our proposed methods have
statistically significant improvement of precision and
F-score compared to our previous constraint-based
methods. The results show that our method demonstrates
the potential to complement other bilingual dictionary
creation methods like word alignment models using
parallel corpora for high-resource languages while well
handling low-resource languages.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "9",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Onyenwe:2018:BLR,
author = "Ikechukwu E. Onyenwe and Mark Hepple and Uchechukwu
Chinedu and Ignatius Ezeani",
title = "A Basic Language Resource Kit Implementation for the
{Igbo} {NLP} Project",
journal = j-TALLIP,
volume = "17",
number = "2",
pages = "10:1--10:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3146387",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Igbo, an African language with around 32 million
speakers worldwide, is one of the many languages having
few or none of the language processing resources needed
for advanced language technology applications. In this
article, we describe the approach taken to creating an
initial set of resources for Igbo, including an
electronic text corpus, a part-of-speech (POS) tagset,
and a POS-tagged subcorpus. We discuss the approach
taken in gathering texts, the preprocessing of these
texts, and the development of the POS tagged corpus. We
also discuss some of the problems encountered during
corpus and tagset development and the solutions arrived
at for these problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "10",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jia:2018:IDP,
author = "Yanyan Jia and Yansong Feng and Yuan Ye and Chao Lv
and Chongde Shi and Dongyan Zhao",
title = "Improved Discourse Parsing with Two-Step Neural
Transition-Based Model",
journal = j-TALLIP,
volume = "17",
number = "2",
pages = "11:1--11:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3152537",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Discourse parsing aims to identify structures and
relationships between different discourse units. Most
existing approaches analyze a whole discourse at once,
which often fails in distinguishing long-span relations
and properly representing discourse units. In this
article, we propose a novel parsing model to analyze
discourse in a two-step fashion with different feature
representations to characterize intra sentence and
inter sentence discourse structures, respectively. Our
model works in a transition-based framework and
benefits from a stack long short-term memory neural
network model. Experiments on benchmark tree banks show
that our method outperforms traditional 1-step parsing
methods in both English and Chinese.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "11",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Naili:2018:CSS,
author = "Marwa Naili and Anja Habacha Chaibi and Henda {Hajjami
Ben Ghezala}",
title = "The Contribution of Stemming and Semantics in {Arabic}
Topic Segmentation",
journal = j-TALLIP,
volume = "17",
number = "2",
pages = "12:1--12:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3152464",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Topic Segmentation is one of the pillars of Natural
Language Processing. Yet there is a remarkable research
gap in this field, as far as the Arabic language is
concerned. The purpose of this article is to improve
Arabic Topic Segmentation (ATS) by inquiring into two
segmenters: ArabC99 and ArabTextTiling. This study is
carried out on two independent levels: the
pre-processing level and the segmentation level. These
levels represent the basic steps of topic segmentation.
On the pre-processing level, we examine the effect of
using different Arabic stemming algorithms on ATS. We
find out that Light10 is more appropriate for the
pre-processing step. Based on this conclusion, we
proceed to the second level by proposing two Arabic
segmenters called ArabC99-LS-LSA and
ArabTextTiling-LS-LSA. These latter use external
semantic knowledge related to the Latent Semantic
Analysis (LSA). Based on the evaluation results, we
notice that LSA provides improvements in this field.
Hence, the main outcome of this article emphasizes the
multilevel improvement of ATS based on Light10 and
LSA.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "12",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Fujita:2018:EPL,
author = "Atsushi Fujita and Pierre Isabelle",
title = "Expanding Paraphrase Lexicons by Exploiting
Generalities",
journal = j-TALLIP,
volume = "17",
number = "2",
pages = "13:1--13:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3160488",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Techniques for generating and recognizing paraphrases,
i.e., semantically equivalent expressions, play an
important role in a wide range of natural language
processing tasks. In the last decade, the task of
automatic acquisition of subsentential paraphrases,
i.e., words and phrases with (approximately) the same
meaning, has been drawing much attention in the
research community. The core problem is to obtain
paraphrases of high quality in large quantity. This
article presents a method for tackling this issue by
systematically expanding an initial seed lexicon made
up of high-quality paraphrases. This involves
automatically capturing morpho-semantic and syntactic
generalizations within the lexicon and using them to
leverage the power of large-scale monolingual data.
Given an input set of paraphrases, our method starts by
inducing paraphrase patterns that constitute
generalizations over corresponding pairs of lexical
variants, such as ``amending'' and ``amendment,'' in a
fully empirical way. It then searches large-scale
monolingual data for new paraphrases matching those
patterns. The results of our experiments on English,
French, and Japanese demonstrate that our method
manages to expand seed lexicons by a large multiple.
Human evaluation based on paraphrase substitution tests
reveals that the automatically acquired paraphrases are
also of high quality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "13",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2018:EEW,
author = "Shaonan Wang and Jiajun Zhang and Chengqing Zong",
title = "Empirical Exploring Word-Character Relationship for
{Chinese} Sentence Representation",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "14:1--14:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3156778",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article addresses the problem of learning
compositional Chinese sentence representations, which
represent the meaning of a sentence by composing the
meanings of its constituent words. In contrast to
English, a Chinese word is composed of characters,
which contain rich semantic information. However, this
information has not been fully exploited by existing
methods. In this work, we introduce a novel, mixed
character-word architecture to improve the Chinese
sentence representations by utilizing rich semantic
information of inner-word characters. We propose two
novel strategies to reach this purpose. The first one
is to use a mask gate on characters, learning the
relation among characters in a word. The second one is
to use a max-pooling operation on words to adaptively
find the optimal mixture of the atomic and
compositional word representations. Finally, the
proposed architecture is applied to various sentence
composition models, which achieves substantial
performance gains over baseline models on sentence
similarity task. To further verify the generalization
ability of our model, we employ the learned sentence
representations as features in sentence classification
task, question classification task, and sentence
entailment task. Results have shown that the proposed
mixed character-word sentence representation models
outperform both the character-based and word-based
models.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "14",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jia:2018:COR,
author = "Shengbin Jia and Shijia E. and Maozhen Li and Yang
Xiang",
title = "{Chinese} Open Relation Extraction and Knowledge Base
Establishment",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "15:1--15:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3162077",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Named entity relation extraction is an important
subject in the field of information extraction.
Although many English extractors have achieved
reasonable performance, an effective system for Chinese
relation extraction remains undeveloped due to the lack
of Chinese annotation corpora and the specificity of
Chinese linguistics. Here, we summarize three kinds of
unique but common phenomena in Chinese linguistics. In
this article, we investigate unsupervised
linguistics-based Chinese open relation extraction
(ORE), which can automatically discover arbitrary
relations without any manually labeled datasets, and
research the establishment of a large-scale corpus. By
mapping the entity relations into dependency-trees and
considering the unique Chinese linguistic
characteristics, we propose a novel unsupervised
Chinese ORE model based on Dependency Semantic Normal
Forms (DSNFs). This model imposes no restrictions on
the relative positions among entities and relationships
and achieves a high yield by extracting relations
mediated by verbs or nouns and processing the parallel
clauses. Empirical results from our model demonstrate
the effectiveness of this method, which obtains stable
performance on four heterogeneous datasets and achieves
better precision and recall in comparison with several
Chinese ORE systems. Furthermore, a large-scale
knowledge base of entity and relation, called COER, is
established and published by applying our method to web
text, which conquers the trouble of lack of Chinese
corpora.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "15",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Marie:2018:PTI,
author = "Benjamin Marie and Atsushi Fujita",
title = "Phrase Table Induction Using Monolingual Data for
Low-Resource Statistical Machine Translation",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "16:1--16:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3168054",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "We propose a new method for inducing a phrase-based
translation model from a pair of unrelated monolingual
corpora. Our method is able to deal with phrases of
arbitrary length and to find phrase pairs that are
useful for statistical machine translation, without
requiring large parallel or comparable corpora. First,
our method generates phrase pairs through coupling
source and target phrases separately collected from
respective monolingual data. Then, for each phrase
pair, we compute features using the monolingual data
and a small quantity of parallel sentences. Finally,
incorrect phrase pairs are pruned, and a phrase table
is made using the remaining phrase pairs. In our
experiments on French--Japanese and Spanish--Japanese
translation tasks under low-resource conditions, we
observe that incorporating a phrase table induced by
our method to the machine translation system leads to
large improvements in translation quality. Furthermore,
we show that a phrase table induced by our method can
also be useful in a wide range of configurations,
including configurations where we have already access
to large parallel corpora and configurations where only
small monolingual corpora are available.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "16",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Salami:2018:ISS,
author = "Shahram Salami and Mehrnoush Shamsfard",
title = "Integrating Shallow Syntactic Labels in the
Phrase-Boundary Translation Model",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "17:1--17:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3178460",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Using a novel rule labeling method, this article
proposes a hierarchical model for statistical machine
translation. The proposed model labels translation
rules by matching the boundaries of target side phrases
with the shallow syntactic labels including POS tags
and chunk labels on the target side of the training
corpus. The boundary labels are concatenated if there
is no label for the whole target span. Labeling with
the classes of boundary words on the target side
phrases has been previously proposed as a
phrase-boundary model which can be considered as the
base form of our model. In the extended model, the
labeler uses a POS tag if there is no chunk label in
one boundary. Using chunks as phrase labels, the
proposed model generalizes the rules to decrease the
model sparseness. The sparseness is a more important
issue in the language pairs with a lot of differences
in the word order because they have less number of
aligned phrase pairs for extraction of rules. The
extended phrase-boundary model is also applicable for
low-resource languages having no syntactic parser. Some
experiments are performed with the proposed model, the
base phrase-boundary model, and variants of Syntax
Augmented Machine Translation (SAMT) in translation
from Persian and German to English as source and target
languages with different word orders. According to the
results, the proposed model improves the translation
performance in the quality and decoding time aspects.
Using BLEU as our metric, the proposed model has
achieved a statistically significant improvement of
about 0.5 point over the base phrase-boundary model.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "17",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sherkawi:2018:ASA,
author = "Lina Sherkawi and Nada Ghneim and Oumayma {Al
Dakkak}",
title = "{Arabic} Speech Act Recognition Techniques",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "18:1--18:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3170576",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article presents rule-based and statistical-based
techniques for Arabic speech act recognition. The
proposed techniques classify an utterance into Arabic
speech act categories based on three criteria: surface
features, cue words, and contextual information. A
rule-based expert system has been developed in a
bootstrapping manner based on the fact that Arabic
language syntax is inherently rule-based. Various
machine-learning algorithms have been used to detect
Arabic speech act categories: Decision Tree, Na{\"\i}ve
Bayes, Neural Network, and SVM. We compare the
experimental results for both techniques
(machine-learning and rule-based expert systems). Using
a corpus of 1,500 sentences, the rule-based expert
system achieved an accuracy rate of 98.92\%, while the
Decision Tree, Na{\"\i}ve Bayes, Neural Network, and
SVM achieved an accuracy rate of 97.09\%, 96.48\%,
93.50\%, and 93.70\%, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "18",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jung:2018:EEK,
author = "Sangkeun Jung and Changki Lee and Hyunsun Hwang",
title = "End-to-End {Korean} Part-of-Speech Tagging Using
Copying Mechanism",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "19:1--19:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3178458",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In this article, we introduce a novel neural
architecture for the end-to-end Korean Part-of-Speech
(POS) tagging problem. To address the problem, we
extend the present recurrent neural network-based
sequence-to-sequence models to deal with the key
challenges in this task: rare word generation and POS
tagging. To overcome these issues, Input-Feeding and
Copying mechanism are adopted. Although our approach
does not require any manual features or preprocessed
pattern matching dictionaries, our best single model
achieves an F-score of 97.08. This is competitive with
the current state-of-the-art model (F-score 98.03),
which requires extensive manual feature processing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "19",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sen:2018:AST,
author = "Shibaprasad Sen and Ankan Bhattacharyya and Pawan
Kumar Singh and Ram Sarkar and Kaushik Roy and David
Doermann",
title = "Application of Structural and Topological Features to
Recognize Online Handwritten {Bangla} Characters",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "20:1--20:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3178457",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article presents a set of novel features for
robust online Bangla handwritten character recognition.
Two feature extraction methods are presented here. The
first describes the transition from background to
foreground pixels and vice versa. The second uses a
combination of topological features and
centre-of-gravity- (CG) based circular features where
global information, local information, and Circular
Quadrant Mass Distribution information have been
extracted. The impact of each along with their
combination have also been analyzed. A total of 15,000
isolated online Bangla character samples have been
collected and used for the evaluation. A Support Vector
Machine classifier records the best recognition rate
when the transition count feature, CG-based circular
features, and topological features are combined.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "20",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{She:2018:LHD,
author = "Xiaohan She and Ping Jian and Pengcheng Zhang and
Heyan Huang",
title = "Leveraging Hierarchical Deep Semantics to Classify
Implicit Discourse Relations via a Mutual Learning
Method",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "21:1--21:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3178456",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "This article presents a mutual learning method using
hierarchical deep semantics for the classification of
implicit discourse relations in English. With the
absence of explicit discourse markers, traditional
discourse techniques mainly concentrate on discrete
linguistic features in this task, which always leads to
a data sparseness problem. To relieve this problem, we
propose a mutual learning neural model that makes use
of multilevel semantic information together, including
the distribution of implicit discourse relations, the
semantics of arguments, and the co-occurrence of
phrases and words. During the training process, the
predicting targets of the model, which are the
probability of the discourse relation type and the
distributed representation of semantic components, are
learned jointly and optimized mutually. The
experimental results show that this method outperforms
the previous works, especially in multiclass
identification attributed to the hierarchical semantic
representations and the mutual learning strategy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "21",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mohamed:2018:MSP,
author = "Emad Mohamed",
title = "Morphological Segmentation and Part-of-Speech Tagging
for the {Arabic} Heritage",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "22:1--22:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3178459",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "We annotate 60,000 words of Classical Arabic (CA) with
topics in philosophy, religion, literature, and law
with fine-grain segment-based morphological
descriptions. We use these annotations for building a
morphological segmenter and part-of-speech (POS) tagger
for CA. With character-level classification and
features from the word and its lexical context, the
segmenter achieves a word accuracy of 96.8\% with the
main issue being a high rate of out-of-vocabulary
words. A token-based POS tagger achieves an accuracy of
96.22\% with 97.72\% on known tokens despite the small
size of the corpus. An error analysis shows that most
of the tagging errors are results of segmentation and
that quality improves with more data being added. The
morphological segmenter and tagger have a wide range of
potential applications in processing CA, a low-resource
variety of the language.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "22",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Huang:2018:IPK,
author = "Degen Huang and Jiahuan Pei and Cong Zhang and Kaiyu
Huang and Jianjun Ma",
title = "Incorporating Prior Knowledge into Word Embedding for
{Chinese} Word Similarity Measurement",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "23:1--23:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3182622",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Word embedding-based methods have received increasing
attention for their flexibility and effectiveness in
many natural language-processing (NLP) tasks, including
Word Similarity (WS). However, these approaches rely on
high-quality corpus and neglect prior knowledge.
Lexicon-based methods concentrate on human's
intelligence contained in semantic resources, e.g.,
Tongyici Cilin, HowNet, and Chinese WordNet, but they
have the drawback of being unable to deal with unknown
words. This article proposes a three-stage framework
for measuring the Chinese word similarity by
incorporating prior knowledge obtained from lexicons
and statistics into word embedding: in the first stage,
we utilize retrieval techniques to crawl the contexts
of word pairs from web resources to extend context
corpus. In the next stage, we investigate three types
of single similarity measurements, including lexicon
similarities, statistical similarities, and
embedding-based similarities. Finally, we exploit
simple combination strategies with math operations and
the counter-fitting combination strategy using
optimization method. To demonstrate our system's
efficiency, comparable experiments are conducted on the
PKU-500 dataset. Our final results are 0.561/0.516 of
Spearman/Pearson rank correlation coefficient, which
outperform the state-of-the-art performance to the best
of our knowledge. Experiment results on Chinese MC-30
and SemEval-2012 datasets show that our system also
performs well on other Chinese datasets, which proves
its transferability. Besides, our system is not
language-specific and can be applied to other
languages, e.g., English.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "23",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ehsani:2018:CWT,
author = "Razieh Ehsani and Ercan Solak and Olcay Taner Yildiz",
title = "Constructing a {WordNet} for {Turkish} Using Manual
and Automatic Annotation",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "24:1--24:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3185664",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "In this article, we summarize the methodology and the
results of our 2-year-long efforts to construct a
comprehensive WordNet for Turkish. In our approach, we
mine a dictionary for synonym candidate pairs and
manually mark the senses in which the candidates are
synonymous. We marked every pair twice by different
human annotators. We derive the synsets by finding the
connected components of the graph whose edges are
synonym senses. We also mined Turkish Wikipedia for
hypernym relations among the senses. We analyzed the
resulting WordNet to highlight the difficulties brought
about by the dictionary construction methods of
lexicographers. After splitting the unusually large
synsets, we used random walk-based clustering that
resulted in a Zipfian distribution of synset sizes. We
compared our results to BalkaNet and automatic
thesaurus construction methods using variation of
information metric. Our Turkish WordNet is available
online.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "24",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Huang:2018:LRR,
author = "Jizhou Huang and Shiqiang Ding and Haifeng Wang and
Ting Liu",
title = "Learning to Recommend Related Entities With
Serendipity for {Web} Search Users",
journal = j-TALLIP,
volume = "17",
number = "3",
pages = "25:1--25:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3185663",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Entity recommendation, providing entity suggestions to
assist users in discovering interesting information,
has become an indispensable feature of today's Web
search engine. However, the majority of existing entity
recommendation methods are not designed to boost the
performance in terms of serendipity, which also plays
an important role in the appreciation of users for a
recommendation system. To keep users engaged, it is
important to take into account serendipity when
building an entity recommendation system. In this
article, we propose a learning to recommend framework
that consists of two components: related entity finding
and candidate entity ranking. To boost serendipity
performance, three different sets of features that
correlate with the three aspects of serendipity are
employed in the proposed framework. Extensive
experiments are conducted on large-scale, real-world
datasets collected from a widely used commercial Web
search engine. The experiments show that our method
significantly outperforms several strong baseline
methods. An analysis on the impact of features reveals
that the set of interestingness features is the most
powerful feature set, and the set of unexpectedness
features can significantly contribute to recommendation
effectiveness. In addition, online controlled
experiments conducted on a commercial Web search engine
demonstrate that our method can significantly improve
user engagement against multiple baseline methods. This
further confirms the effectiveness of the proposed
framework.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "25",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Basiri:2018:WII,
author = "Mohammad Ehsan Basiri and Arman Kabiri",
title = "Words Are Important: Improving Sentiment Analysis in
the {Persian} Language by Lexicon Refining",
journal = j-TALLIP,
volume = "17",
number = "4",
pages = "26:1--26:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3195633",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Lexicon-based sentiment analysis (SA) aims to address
the problem of extracting people's opinions from their
comments on the Web using a predefined lexicon of
opinionated words. In contrast to the machine learning
(ML) approach, lexicon-based methods are
domain-independent methods that do not need a large
annotated training corpus and hence are faster. This
makes the lexicon-based approach prevalent in the SA
community. However, the story is different for the
Persian language. In contrast to English, using the
lexicon-based method in Persian is a new discipline.
There are rather limited resources available for SA in
Persian, making the accuracy of the existing
lexicon-based methods lower than other languages. In
the current study, first an exhaustive investigation of
the lexicon-based method is performed. Then two new
resources are introduced to address the problem of
resource scarcity for SA in Persian: a carefully
labeled lexicon of sentiment words, PerLex, and a new
handmade dataset of about 16,000 rated documents,
PerView. Moreover, a new hybrid method using both ML
and the lexicon-based approach is presented in which
PerLex words are used to train the ML algorithm.
Experiments are carried out on our new PerView dataset.
Results indicate that the accuracy of PerLex is higher
than the existing CNRC, Adjectives, SentiStrength,
PerSent, and LexiPers lexicons. In addition, the
results show that using PerLex significantly decreases
the execution time of the proposed system in comparison
to the above-mentioned lexicons. Moreover, the results
demonstrate the excellence of using opinionated lexicon
terms followed by bigrams as the features employed in
the ML method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "26",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Suryani:2018:RBS,
author = "Arie Ardiyanti Suryani and Dwi Hendratmo Widyantoro
and Ayu Purwarianti and Yayat Sudaryat",
title = "The Rule-Based {Sundanese} Stemmer",
journal = j-TALLIP,
volume = "17",
number = "4",
pages = "27:1--27:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3195634",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Our research proposed an iterative Sundanese stemmer
by removing the derivational affixes prior to the
inflexional. This scheme was chosen because, in the
Sundanese affixation, a confix (one of derivational
affix) is applied in the last phase of a morphological
process. Moreover, most of Sundanese affixes are
derivational, so removing the derivational affix as the
first step is reasonable. To handle ambiguity, the last
recognized affix was returned as the result. As the
baseline, a Confix-Stripping Approach that applies
Porter Stemmer for the Indonesian language was used.
This stemmer shares similarities in terms of affix
type, but uses a different stemming order. To observe
whether the baseline stems the Sundanese affixed word
properly, some features that were not covered by the
baseline, such as the infix and allomorph removal, were
added. The evaluation was done using 4,453 unique
affixed words collected from Sundanese online
magazines. The experiment shows that, as a whole, our
stemmer outperforms the modified baseline in terms of
recognized affixed type accuracy and properly stemmed
affixed words. Our stemmer recognized 68.87\% of the
Sundanese affixed types and produced 96.79\% of the
correctly affixed words; the modified baseline resulted
in 21.70\% and 71.59\%, respectively",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "27",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{He:2018:DPS,
author = "Ruifang He and Yaru Wang and Dawei Song and Peng Zhang
and Yuan Jia and Aijun Li",
title = "A Dependency Parser for Spontaneous {Chinese} Spoken
Language",
journal = j-TALLIP,
volume = "17",
number = "4",
pages = "28:1--28:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3196278",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Dependency analysis is vital for spoken language
understanding in spoken dialogue systems. However,
existing research has mainly focused on western spoken
languages, Japanese, and so on. Little research has
been done for spoken Chinese in terms of dependency
parsing. Therefore, the new spoken corpus, D-ESCSC
(Dependency-Expressive Speech Corpus of Standard
Chinese) is built by adding new dependency relations
special to spoken Chinese based on a written Chinese
annotation scheme. Since spoken Chinese contains
typical ill-grammatical phenomena, e.g., translocation,
repetition, duplication, and omission, the new atom
feature related to punctuation and three feature
templates are proposed to improve a graph-based
dependency parser. Experimental results on spoken
Chinese corpus show that the atom feature and three
templates really work and the new parser outperforms
the baseline parser. To our best knowledge, it is the
first work to report dependency parsing results of
spoken Chinese.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "28",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bai:2018:IVS,
author = "Xuefeng Bai and Hailong Cao and Tiejun Zhao",
title = "Improving Vector Space Word Representations Via Kernel
Canonical Correlation Analysis",
journal = j-TALLIP,
volume = "17",
number = "4",
pages = "29:1--29:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3197566",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Cross-lingual word embeddings are representations for
vocabularies of two or more languages in one common
continuous vector space and are widely used in various
natural language processing tasks. A state-of-the-art
way to generate cross-lingual word embeddings is to
learn a linear mapping, with an assumption that the
vector representations of similar words in different
languages are related by a linear relationship.
However, this assumption does not always hold true,
especially for substantially different languages. We
therefore propose to use kernel canonical correlation
analysis to capture a non-linear relationship between
word embeddings of two languages. By extensively
evaluating the learned word embeddings on three tasks
(word similarity, cross-lingual dictionary induction,
and cross-lingual document classification) across five
language pairs, we demonstrate that our proposed
approach achieves essentially better performances than
previous linear methods on all of the three tasks,
especially for language pairs with substantial
typological difference.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "29",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Park:2018:NCI,
author = "Taekeun Park and Seung-Hoon Kim",
title = "Novel Character Identification Utilizing Semantic
Relation with Animate Nouns in {Korean}",
journal = j-TALLIP,
volume = "17",
number = "4",
pages = "30:1--30:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3197657",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "For identifying speakers of quoted speech or
extracting social networks from literature, it is
indispensable to extract character names and nominals.
However, detecting proper nouns in the novels
translated into or written in Korean is harder than in
English because Korean does not have a capitalization
feature. In addition, it is almost impossible for any
proper noun dictionary to include all kinds of
character names that have been created or will be
created by authors. Fortunately, a previous study shows
that utilizing postpositions for animate nouns is a
simple and effective tool for character identification
in Korean novels without a proper noun dictionary and a
training corpus. In this article, we propose a
character identification method utilizing the semantic
relation with known animate nouns. For 80 novels in
Korean, the proposed method increases the micro- and
macro-average recall by 13.68\% and 11.86\%,
respectively, while decreasing the micro-average
precision by 0.28\% and increasing the macro-average
precision by 0.07\% compared to the previous study. If
we focus on characters that are responsible for more
than 1\% of the character name mentions in each novel,
the micro- and macro-average F-measure of the proposed
method are 96.98\% and 97.32\%, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "30",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2018:GBB,
author = "Rui Wang and Hai Zhao and Sabine Ploux and Bao-Liang
Lu and Masao Utiyama and Eiichiro Sumita",
title = "Graph-Based Bilingual Word Embedding for Statistical
Machine Translation",
journal = j-TALLIP,
volume = "17",
number = "4",
pages = "31:1--31:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3203078",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Bilingual word embedding has been shown to be helpful
for Statistical Machine Translation (SMT). However,
most existing methods suffer from two obvious
drawbacks. First, they only focus on simple contexts
such as an entire document or a fixed-sized sliding
window to build word embedding and ignore latent useful
information from the selected context. Second, the word
sense but not the word should be the minimal semantic
unit; however, most existing methods still use word
representation. To overcome these drawbacks, this
article presents a novel Graph-Based Bilingual Word
Embedding (GBWE) method that projects bilingual word
senses into a multidimensional semantic space. First, a
bilingual word co-occurrence graph is constructed using
the co-occurrence and pointwise mutual information
between the words. Then, maximum complete subgraphs
(cliques), which play the role of a minimal unit for
bilingual sense representation, are dynamically
extracted according to the contextual information.
Consequently, correspondence analysis, principal
component analyses, and neural networks are used to
summarize the clique-word matrix into lower dimensions
to build the embedding model. Without contextual
information, the proposed GBWE can be applied to
lexical translation. In addition, given contextual
information, GBWE is able to give a dynamic solution
for bilingual word representations, which can be
applied to phrase translation and generation. Empirical
results show that GBWE can enhance the performance of
lexical translation, as well as
Chinese/French-to-English and Chinese-to-Japanese
phrase-based SMT tasks (IWSLT, NTCIR, NIST, and WAT).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "31",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Hamdi:2018:CCS,
author = "Ali Hamdi and Khaled Shaban and Anazida Zainal",
title = "{CLASENTI}: a Class-Specific Sentiment Analysis
Framework",
journal = j-TALLIP,
volume = "17",
number = "4",
pages = "32:1--32:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3209885",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Arabic text sentiment analysis suffers from low
accuracy due to Arabic-specific challenges (e.g.,
limited resources, morphological complexity, and
dialects) and general linguistic issues (e.g.,
fuzziness, implicit sentiment, sarcasm, and spam). The
limited resources problem requires efforts to build new
and improved Arabic corpora and lexica. We propose a
class-specific sentiment analysis (CLASENTI) framework.
The framework includes a new annotation approach to
build multi-faceted Arabic corpus and lexicon allowing
for simultaneous annotation of different facets,
including domains, dialects, linguistic issues, and
polarity strengths. Each of these facets has multiple
classes (e.g., the nine classes representing dialects
found in the Arab world). The new corpus and lexicon
annotations facilitate the development of new
class-specific classification models and polarity
strength calculation. For the new sentiment
classification models, we propose a hybrid model
combining corpus-based and lexicon-based models. The
corpus-based model has two interrelated phases to
build; (1) full-corpus classification models for all
facets; and (2) class-specific models trained on
filtered subsets of the corpus according to the
performances of the full-corpus models. To calculate
polarity strengths, the lexicon-based model filters the
annotated lexicon based on the specific classes of the
domain and dialect. As a case study, we collect and
annotate 15274 reviews from various sources, including
surveys, Facebook comments, and Twitter posts,
pertaining to governmental services. In addition, we
develop a new web-based application to apply the
proposed framework on the case study. CLASENTI
framework reaches up to 95\% accuracy and 93\% F1-Score
surpassing the best-known sentiment classifiers
implemented in Scikit-learn library that achieve 82\%
accuracy and 81\% F1-Score for Arabic when tested on
the same dataset.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "32",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2018:DSN,
author = "Limin Wang and Shoushan Li and Qian Yan and Guodong
Zhou",
title = "Domain-specific Named Entity Recognition with
Document-Level Optimization",
journal = j-TALLIP,
volume = "17",
number = "4",
pages = "33:1--33:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3213544",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Previous studies normally formulate named entity
recognition (NER) as a sequence labeling task and
optimize the solution in the sentence level. In this
article, we propose a document-level optimization
approach to NER and apply it in a domain-specific
document-level NER task. As a baseline, we apply a
state-of-the-art approach, i.e., long-short-term memory
(LSTM), to perform word classification. On this basis,
we define a global objective function with the obtained
word classification results and achieve global
optimization via Integer Linear Programming (ILP).
Specifically, in the ILP-based approach, we propose
four kinds of constraints, i.e., label transition,
entity length, label consistency, and domain-specific
regulation constraints, to incorporate various entity
recognition knowledge in the document level. Empirical
studies demonstrate the effectiveness of the proposed
approach to domain-specific document-level NER.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "33",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Komiya:2018:CMA,
author = "Kanako Komiya and Masaya Suzuki and Tomoya Iwakura and
Minoru Sasaki and Hiroyuki Shinnou",
title = "Comparison of Methods to Annotate Named Entity
Corpora",
journal = j-TALLIP,
volume = "17",
number = "4",
pages = "34:1--34:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3218820",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "The authors compared two methods for annotating a
corpus for the named entity (NE) recognition task using
non-expert annotators: (i) revising the results of an
existing NE recognizer and (ii) manually annotating the
NEs completely. The annotation time, degree of
agreement, and performance were evaluated based on the
gold standard. Because there were two annotators for
one text for each method, two performances were
evaluated: the average performance of both annotators
and the performance when at least one annotator is
correct. The experiments reveal that semi-automatic
annotation is faster, achieves better agreement, and
performs better on average. However, they also indicate
that sometimes, fully manual annotation should be used
for some texts whose document types are substantially
different from the training data document types. In
addition, the machine learning experiments using
semi-automatic and fully manually annotated corpora as
training data indicate that the F-measures could be
better for some texts when manual instead of
semi-automatic annotation was used. Finally,
experiments using the annotated corpora for training as
additional corpora show that (i) the NE recognition
performance does not always correspond to the
performance of the NE tag annotation and (ii) the
system trained with the manually annotated corpus
outperforms the system trained with the
semi-automatically annotated corpus with respect to
newswires, even though the existing NE recognizer was
mainly trained with newswires.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "34",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhou:2018:WSP,
author = "Deyu Zhou and Zhikai Zhang and Min-Ling Zhang and
Yulan He",
title = "Weakly Supervised {POS} Tagging without
Disambiguation",
journal = j-TALLIP,
volume = "17",
number = "4",
pages = "35:1--35:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3214707",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:31 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
abstract = "Weakly supervised part-of-speech (POS) tagging is to
learn to predict the POS tag for a given word in
context by making use of partial annotated data instead
of the fully tagged corpora. Weakly supervised POS
tagging would benefit various natural language
processing applications in such languages where tagged
corpora are mostly unavailable. In this article, we
propose a novel framework for weakly supervised POS
tagging based on a dictionary of words with their
possible POS tags. In the constrained error-correcting
output codes (ECOC)-based approach, a unique L -bit
vector is assigned to each POS tag. The set of
bitvectors is referred to as a coding matrix with value
{ 1, -1}. Each column of the coding matrix specifies a
dichotomy over the tag space to learn a binary
classifier. For each binary classifier, its training
data is generated in the following way: each pair of
words and its possible POS tags are considered as a
positive training example only if the whole set of its
possible tags falls into the positive dichotomy
specified by the column coding and similarly for
negative training examples. Given a word in context,
its POS tag is predicted by concatenating the
predictive outputs of the L binary classifiers and
choosing the tag with the closest distance according to
some measure. By incorporating the ECOC strategy, the
set of all possible tags for each word is treated as an
entirety without the need of performing disambiguation.
Moreover, instead of manual feature engineering
employed in most previous POS tagging approaches,
features for training and testing in the proposed
framework are automatically generated using neural
language modeling. The proposed framework has been
evaluated on three corpora for English, Italian, and
Malagasy POS tagging, achieving accuracies of 93.21\%,
90.9\%, and 84.5\% individually, which shows a
significant improvement compared to the
state-of-the-art approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "35",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bhattacharya:2019:UCW,
author = "Paheli Bhattacharya and Pawan Goyal and Sudeshna
Sarkar",
title = "Using Communities of Words Derived from Multilingual
Word Vectors for Cross-Language Information Retrieval
in {Indian} Languages",
journal = j-TALLIP,
volume = "18",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3208358",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3208358",
abstract = "We investigate the use of word embeddings for query
translation to improve precision in cross-language
information retrieval (CLIR). Word vectors represent
words in a distributional space such that syntactically
or semantically similar words are close to each other
in this space. Multilingual word embeddings are
constructed in such a way that similar words across
languages have similar vector representations. We
explore the effective use of bilingual and multilingual
word embeddings learned from comparable corpora of
Indic languages to the task of CLIR. We propose a
clustering method based on the multilingual word
vectors to group similar words across languages. For
this we construct a graph with words from multiple
languages as nodes and with edges connecting words with
similar vectors. We use the Louvain method for
community detection to find communities in this graph.
We show that choosing target language words as query
translations from the clusters or communities
containing the query terms helps in improving CLIR. We
also find that better-quality query translations are
obtained when words from more languages are used to do
the clustering even when the additional languages are
neither the source nor the target languages. This is
probably because having more similar words across
multiple languages helps define well-defined dense
subclusters that help us obtain precise query
translations. In this article, we demonstrate the use
of multilingual word embeddings and word clusters for
CLIR involving Indic languages. We also make available
a tool for obtaining related words and the
visualizations of the multilingual word vectors for
English, Hindi, Bengali, Marathi, Gujarati, and
Tamil.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "1",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2019:OAE,
author = "Maoxi Li and Mingwen Wang",
title = "Optimizing Automatic Evaluation of Machine Translation
with the {ListMLE} Approach",
journal = j-TALLIP,
volume = "18",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3226045",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3226045",
abstract = "Automatic evaluation of machine translation is
critical for the evaluation and development of machine
translation systems. In this study, we propose a new
model for automatic evaluation of machine translation.
The proposed model combines standard n-gram precision
features and sentence semantic mapping features with
neural features, including neural language model
probabilities and the embedding distances between
translation outputs and their reference translations.
We optimize the model with a representative list-wise
learning to rank approach, ListMLE, in terms of human
ranking assessments. The experimental results on
WMT'2015 Metrics task indicated that the proposed
approach yields significantly better correlations with
human assessments than several state-of-the-art
baseline approaches. In particular, the results
confirmed that the proposed list-wise learning to rank
approach is useful and powerful for optimizing
automatic evaluation metrics in terms of human ranking
assessments. Deep analysis also demonstrated that
optimizing automatic metrics with the ListMLE approach
is a reasonable method and adding the neural features
can gain considerable improvements compared with the
traditional features.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "2",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Su:2019:RSA,
author = "Ming-Hsiang Su and Chung-Hsien Wu and Kun-Yi Huang and
Wu-Hsuan Lin",
title = "Response Selection and Automatic Message-Response
Expansion in Retrieval-Based {QA} Systems using
Semantic Dependency Pair Model",
journal = j-TALLIP,
volume = "18",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3229184",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3229184",
abstract = "This article presents an approach to response
selection and message-response (MR) database expansion
from the unstructured data on the psychological
consultation websites for a retrieval-based question
answering (QA) system in a constrained domain for
emotional support and comforting. First, we manually
construct an initial MR database based on the articles
collected from the psychological consultation websites.
The Chinese Knowledge and Information Processing
probabilistic context-free grammar is adopted to obtain
the semantic dependency graphs (SDGs) of all the
messages and responses in the initial MR database. For
each sentence in the MR database, all the semantic
dependencies, each composed of two words and their
semantic relation, are extracted from the SDG of the
sentence to form a semantic dependency set. Finally, a
matrix with the element representing the correlation
between the semantic dependencies of the messages and
their corresponding responses is constructed as a
semantic dependency pair model (SDPM) for response
selection. Moreover, as the number of MR pairs in the
psychological consultation websites is increasing day
by day, the MR database in the QA system should be
expanded to meet the needs of the users. For MR
database expansion, the unstructured data from the
message board are automatically collected. For the
collected data, the supervised latent Dirichlet
allocation is adopted for event detection and then the
event-based delta Bayesian Information Criterion is
used for message and response article segmentation.
Each extracted message segment is then fed to the
constructed retrieval-based QA system to find the best
matched response segment and the matching score is also
estimated to verify if the new MR pair is suitable to
be included in the expanded MR database. Fivefold cross
validation was employed to evaluate the performance of
the proposed retrieval-based QA system over the
expanded MR database based on SDPM. Compared to the
vector space model-based method, the Okapi BM25 model,
and the deep learning-based sequence-to-sequence with
attention model, the proposed approach achieved a more
favorable performance according to a statistical
significance test. The retrieval accuracy based on MR
expansion was also evaluated and a satisfactory result
was obtained confirming the effectiveness of the
expanded MR database. In addition, the user's
satisfaction score of the proposed system was evaluated
using the Cronbach's alpha value and the satisfaction
score of the proposed SDPM was higher than those of the
methods for comparison.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "3",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Huang:2019:IMH,
author = "Guoping Huang and Jiajun Zhang and Yu Zhou and
Chengqing Zong",
title = "Input Method for Human Translators: a Novel Approach
to Integrate Machine Translation Effectively and
Imperceptibly",
journal = j-TALLIP,
volume = "18",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3230638",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3230638",
abstract = "Computer-aided translation (CAT) systems are the most
popular tool for helping human translators efficiently
perform language translation. To further improve the
translation efficiency, there is an increasing interest
in applying machine translation (MT) technology to
upgrade CAT. To thoroughly integrate MT into CAT
systems, in this article, we propose a novel approach:
a new input method that makes full use of the knowledge
adopted by MT systems, such as translation rules,
decoding hypotheses, and n-best translation lists. The
proposed input method contains two parts: a phrase
generation model, allowing human translators to type
target sentences quickly, and an n-gram prediction
model, helping users choose perfect MT fragments
smoothly. In addition, to tune the underlying MT system
to generate the input method preferable results, we
design a new evaluation metric for the MT system. The
proposed input method integrates MT effectively and
imperceptibly, and it is particularly suitable for many
target languages with complex characters, such as
Chinese and Japanese. The extensive experiments
demonstrate that our method saves more than 23\% in
time and over 42\% in keystrokes, and it also improves
the translation quality by more than 5 absolute BLEU
scores compared with the strong baseline, i.e.,
post-editing using Google Pinyin.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "4",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Altakrori:2019:AAA,
author = "Malik H. Altakrori and Farkhund Iqbal and Benjamin C.
M. Fung and Steven H. H. Ding and Abdallah Tubaishat",
title = "{Arabic} Authorship Attribution: an Extensive Study on
{Twitter} Posts",
journal = j-TALLIP,
volume = "18",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3236391",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3236391",
abstract = "Law enforcement faces problems in tracing the true
identity of offenders in cybercrime investigations.
Most offenders mask their true identity, impersonate
people of high authority, or use identity deception and
obfuscation tactics to avoid detection and
traceability. To address the problem of anonymity,
authorship analysis is used to identify individuals by
their writing styles without knowing their actual
identities. Most authorship studies are dedicated to
English due to its widespread use over the Internet,
but recent cyber-attacks such as the distribution of
Stuxnet indicate that Internet crimes are not limited
to a certain community, language, culture, ideology, or
ethnicity. To effectively investigate cybercrime and to
address the problem of anonymity in online
communication, there is a pressing need to study
authorship analysis of languages such as Arabic,
Chinese, Turkish, and so on. Arabic, the focus of this
study, is the fourth most widely used language on the
Internet. This study investigates authorship of Arabic
discourse/text, especially tiny text, Twitter posts. We
benchmark the performance of a profile-based approach
that uses n -grams as features and compare it with
state-of-the-art instance-based classification
techniques. Then we adapt an event-visualization tool
that is developed for English to accommodate both
Arabic and English languages and visualize the result
of the attribution evidence. In addition, we
investigate the relative effect of the training set,
the length of tweets, and the number of authors on
authorship classification accuracy. Finally, we show
that diacritics have an insignificant effect on the
attribution process and part-of-speech tags are less
effective than character-level and word-level n
-grams.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "5",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhang:2019:WSB,
author = "Shaoning Zhang and Cunli Mao and Zhengtao Yu and
Hongbin Wang and Zhongwei Li and Jiafu Zhang",
title = "Word Segmentation for {Burmese} Based on Dual-Layer
{CRFs}",
journal = j-TALLIP,
volume = "18",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3232537",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3232537",
abstract = "Burmese is an isolated language, in which the syllable
is the smallest unit. Syllable segmentation methods
based on matching lead to performance subject to the
syllable segmentation effect. This article proposes a
word segmentation method with fusion conditions of
double syllable features. It combines word segmentation
and segmentation of syllables into one process, thus
reducing the impact of errors on the syllable
segmentation of Burmese. In the first layer of the
conditional random fields (CRF) model, Burmese
characters as atomic features are integrated into the
Burma section of the Barkis Speech Paradigm (Backus
normal form) features to realize the Burma syllable
sequence tags. In the second layer of the CRFs model,
with the syllable marked as input, it realizes the
sequence markers through building a feature template
with syllables as atomic features. The experimental
results show that the proposed method has a better
effect compared with the method based on the matching
of syllables.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "6",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2019:IML,
author = "Junjie Li and Haoran Li and Xiaomian Kang and Haitong
Yang and Chengqing Zong",
title = "Incorporating Multi-Level User Preference into
Document-Level Sentiment Classification",
journal = j-TALLIP,
volume = "18",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3234512",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3234512",
abstract = "Document-level sentiment classification aims to
predict a user's sentiment polarity in a document about
a product. Most existing methods only focus on review
contents and ignore users who post reviews. In fact,
when reviewing a product, different users have
different word-using habits to express opinions (i.e.,
word-level user preference), care about different
attributes of the product (i.e., aspect-level user
preference), and have different characteristics to
score the review (i.e., polarity-level user
preference). These preferences have great influence on
interpreting the sentiment of text. To address this
issue, we propose a model called Hierarchical User
Attention Network (HUAN), which incorporates
multi-level user preference into a hierarchical neural
network to perform document-level sentiment
classification. Specifically, HUAN encodes different
kinds of information (word, sentence, aspect, and
document) in a hierarchical structure and imports user
embedding and user attention mechanism to model these
preferences. Empirical results on two real-world
datasets show that HUAN achieves state-of-the-art
performance. Furthermore, HUAN can also mine important
attributes of products for different users.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "7",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jain:2019:UES,
author = "Amita Jain and Minni Jain and Goonjan Jain and
Devendra K. Tayal",
title = "{``UTTAM''}: an Efficient Spelling Correction System
for {Hindi} Language Based on Supervised Learning",
journal = j-TALLIP,
volume = "18",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3264620",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3264620",
abstract = "In this article, we propose a system called ``UTTAM,''
for correcting spelling errors in Hindi language text
using supervised learning. Unlike other languages,
Hindi contains a large set of characters, words with
inflections and complex characters, phonetically
similar sets of characters, and so on. The complexity
increases the possibility of confusion and occasionally
leads to entering a wrong character in a word. The
existence of spelling errors in text significantly
decreases the accuracy of the available resources, like
search engine, text editor, and so on. The proposed
work is the first approach to correct non-word (Out of
Vocabulary) errors as well as real-word errors
simultaneously in a sentence of Hindi language. The
proposed method investigates the human behavior, i.e.,
the type and frequency of spelling errors done by
humans in Hindi text. Based on the type and frequency
of spelling errors, the heterogeneous data is collected
in matrices. This data in matrices is used to generate
the suitable candidate words for an input word. After
generating candidate words, the Viterbi algorithm is
applied to perform the word correction. The Viterbi
algorithm finds the best sequence of candidate words to
correct the input sentence. For Hindi, this work is the
first attempt for real-word error correction. For
non-word errors, the experiments show that ``UTTAM''
performs better than the existing systems SpellGuru and
Saksham.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "8",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Murthy:2019:INT,
author = "Rudra Murthy and Mitesh M. Khapra and Pushpak
Bhattacharyya",
title = "Improving {NER} Tagging Performance in Low-Resource
Languages via Multilingual Learning",
journal = j-TALLIP,
volume = "18",
number = "2",
pages = "9:1--9:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3238797",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3238797",
abstract = "Existing supervised solutions for Named Entity
Recognition (NER) typically rely on a large annotated
corpus. Collecting large amounts of NER annotated
corpus is time-consuming and requires considerable
human effort. However, collecting small amounts of
annotated corpus for any language is feasible, but the
performance degrades due to data sparsity. We address
the data sparsity by borrowing features from the data
of a closely related language. We use hierarchical
neural networks to train a supervised NER system. The
feature borrowing from a closely related language
happens via the shared layers of the network. The
neural network is trained on the combined dataset of
the low-resource language and a closely related
language, also termed Multilingual Learning. Unlike
existing systems, we share all layers of the network
between the two languages. We apply multilingual
learning for NER in Indian languages and empirically
show the benefits over a monolingual deep learning
system and a traditional machine-learning system with
some feature engineering. Using multilingual learning,
we show that the low-resource language NER performance
increases mainly due to (1) increased named entity
vocabulary, (2) cross-lingual subword features, and (3)
multilingual learning playing the role of
regularization.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "9",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jarrar:2019:DBM,
author = "Mustafa Jarrar and Fadi Zaraket and Rami Asia and
Hamzeh Amayreh",
title = "Diacritic-Based Matching of {Arabic} Words",
journal = j-TALLIP,
volume = "18",
number = "2",
pages = "10:1--10:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3242177",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3242177",
abstract = "Words in Arabic consist of letters and short vowel
symbols called diacritics inscribed atop regular
letters. Changing diacritics may change the syntax and
semantics of a word; turning it into another. This
results in difficulties when comparing words based
solely on string matching. Typically, Arabic NLP
applications resort to morphological analysis to battle
ambiguity originating from this and other challenges.
In this article, we introduce three alternative
algorithms to compare two words with possibly different
diacritics. We propose the Subsume knowledge-based
algorithm, the Imply rule-based algorithm, and the
Alike machine-learning-based algorithm. We evaluated
the soundness, completeness, and accuracy of the
algorithms against a large dataset of 86,886 word
pairs. Our evaluation shows that the accuracy of
Subsume (100\%), Imply (99.32\%), and Alike (99.53\%).
Although accurate, Subsume was able to judge only 75\%
of the data. Both Subsume and Imply are sound, while
Alike is not. We demonstrate the utility of the
algorithms using a real-life use case --- in lemma
disambiguation and in linking hundreds of Arabic
dictionaries.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "10",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bhattacharya:2019:SSW,
author = "Nilanjana Bhattacharya and Partha Pratim Roy and
Umapada Pal",
title = "Sub-Stroke-Wise Relative Feature for Online {Indic}
Handwriting Recognition",
journal = j-TALLIP,
volume = "18",
number = "2",
pages = "11:1--11:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3264735",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3264735",
abstract = "The main problem of Bangla (Bengali) and Devanagari
handwriting recognition is the shape similarity of
characters. There are only a few pieces of work on
writer-independent cursive online Indian text
recognition, and the shape similarity problem needs
more attention from the researchers. To handle the
shape similarity problem of cursive characters of
Bangla and Devanagari scripts, in this article, we
propose a new category of features called `
sub-stroke-wise relative feature ' (SRF) which are
based on relative information of the constituent parts
of the handwritten strokes. Relative information among
some of the parts within a character can be a
distinctive feature as it scales up small
dissimilarities and enhances discrimination among
similar-looking shapes. Also, contextual anticipatory
phenomena are automatically modeled by this type of
feature, as it takes into account the influence of
previous and forthcoming strokes. We have tested
popular state-of-the-art feature sets as well as
proposed SRF using various (up to 20,000-word) lexicons
and noticed that SRF significantly outperforms the
state-of-the-art feature sets for online Bangla and
Devanagari cursive word recognition.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "11",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mrinalini:2019:PBP,
author = "K. Mrinalini and T. Nagarajan and P. Vijayalakshmi",
title = "Pause-Based Phrase Extraction and Effective {OOV}
Handling for Low-Resource Machine Translation Systems",
journal = j-TALLIP,
volume = "18",
number = "2",
pages = "12:1--12:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3265751",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3265751",
abstract = "Machine translation is the core problem for several
natural language processing research across the globe.
However, building a translation system involving
low-resource languages remains a challenge with respect
to statistical machine translation (SMT). This work
proposes and studies the effect of a phrase-induced
hybrid machine translation system for translation from
English to Tamil, under a low-resource setting. Unlike
conventional hybrid MT systems, the free-word ordering
feature of the target language Tamil is exploited to
form a re-ordered target language model and to extend
the parallel text corpus for training the SMT. In the
current work, a novel rule-based phrase-extraction
method, implemented using parts-of-speech (POS) and
place-of-pause in both languages is proposed, which is
used to pre-process the training corpus for developing
the back-off phrase-induced SMT. Further,
out-of-vocabulary (OOV) words are handled using
speech-based transliteration and two-level thesaurus
intersection techniques based on the POS tag of the OOV
word. To ensure that the input with OOV words does not
skip phrase-level translation in the hierarchical
model, a phrase-level example-based machine translation
approach is adopted to find the closest matching phrase
and perform translation followed by OOV replacement.
The proposed system results in a bilingual evaluation
understudy score of 84.78 and a translation edit rate
of 19.12. The performance of the system is compared in
terms of adequacy and fluency, with existing
translation systems for this specific language pair,
and it is observed that the proposed system outperforms
its counterparts.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "12",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Le:2019:LRM,
author = "Ngoc Tan Le and Fatiha Sadat and Lucie Menard and Dien
Dinh",
title = "Low-Resource Machine Transliteration Using Recurrent
Neural Networks",
journal = j-TALLIP,
volume = "18",
number = "2",
pages = "13:1--13:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3265752",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3265752",
abstract = "Grapheme-to-phoneme models are key components in
automatic speech recognition and text-to-speech
systems. With low-resource language pairs that do not
have available and well-developed pronunciation
lexicons, grapheme-to-phoneme models are particularly
useful. These models are based on initial alignments
between grapheme source and phoneme target sequences.
Inspired by sequence-to-sequence recurrent neural
network--based translation methods, the current
research presents an approach that applies an alignment
representation for input sequences and pretrained
source and target embeddings to overcome the
transliteration problem for a low-resource languages
pair. Evaluation and experiments involving French and
Vietnamese showed that with only a small bilingual
pronunciation dictionary available for training the
transliteration models, promising results were obtained
with a large increase in BLEU scores and a reduction in
Translation Error Rate (TER) and Phoneme Error Rate
(PER). Moreover, we compared our proposed neural
network--based transliteration approach with a
statistical one.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "13",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Na:2019:TBK,
author = "Seung-hoon Na and Jianri Li and Jong-hoon Shin and
Kangil Kim",
title = "Transition-Based {Korean} Dependency Parsing Using
Hybrid Word Representations of Syllables and Morphemes
with {LSTMs}",
journal = j-TALLIP,
volume = "18",
number = "2",
pages = "14:1--14:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3241745",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3241745",
abstract = "Recently, neural approaches for transition-based
dependency parsing have become one of the state-of-the
art methods for performing dependency parsing tasks in
many languages. In neural transition-based parsing, a
parser state representation is first computed from the
configuration of a stack and a buffer, which is then
fed into a feed-forward neural network model that
predicts the next transition action. Given that words
are basic elements of a stack and buffer, a parser
state representation is considerably affected by how a
word representation is defined. In particular, word
representation issues become more critical in
morphologically rich languages such as Korean, as the
set of potential words is not bound but introduce the
second-order vocabulary complexity, called the phrase
vocabulary complexity due to the agglutinative
characteristics of the language. In this article, we
propose a hybrid word representation that combines two
compositional word representations, each of which is
derived from representations of syllables and
morphemes, respectively. Our underlying assumption for
this hybrid word representation is that, because both
syllables and morphemes are two common ways of
decomposing Korean words, it is expected that their
effects in inducing word representation are
complementary to one another. Experimental results
carried on Sejong and SPMRL 2014 datasets show that our
proposed hybrid word representation leads to the
state-of-the-art performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "14",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Akhtar:2019:IWE,
author = "Md Shad Akhtar and Palaash Sawant and Sukanta Sen and
Asif Ekbal and Pushpak Bhattacharyya",
title = "Improving Word Embedding Coverage in Less-Resourced
Languages Through Multi-Linguality and
Cross-Linguality: a Case Study with Aspect-Based
Sentiment Analysis",
journal = j-TALLIP,
volume = "18",
number = "2",
pages = "15:1--15:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3273931",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3273931",
abstract = "In the era of deep learning-based systems, efficient
input representation is one of the primary requisites
in solving various problems related to Natural Language
Processing (NLP), data mining, text mining, and the
like. Absence of adequate representation for an input
introduces the problem of data sparsity, and it poses a
great challenge to solve the underlying problem. The
problem is more intensified with resource-poor
languages due to the absence of a sufficiently large
corpus required to train a word embedding model. In
this work, we propose an effective method to improve
the word embedding coverage in less-resourced languages
by leveraging bilingual word embeddings learned from
different corpora. We train and evaluate deep Long
Short Term Memory (LSTM)-based architecture and show
the effectiveness of the proposed approach for two
aspect-level sentiment analysis tasks (i.e., aspect
term extraction and sentiment classification). The
neural network architecture is further assisted by
hand-crafted features for prediction. We apply the
proposed model in two experimental setups:
multi-lingual and cross-lingual. Experimental results
show the effectiveness of the proposed approach against
the state-of-the-art methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "15",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Nakamura:2019:WBR,
author = "Tatsuya Nakamura and Masumi Shirakawa and Takahiro
Hara and Shojiro Nishio",
title = "{Wikipedia}-Based Relatedness Measurements for
Multilingual Short Text Clustering",
journal = j-TALLIP,
volume = "18",
number = "2",
pages = "16:1--16:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3276473",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3276473",
abstract = "Throughout the world, people can post information
about their local area in their own languages using
social networking services. Multilingual short text
clustering is an important task to organize such
information, and it can be applied to various
applications, such as event detection and
summarization. However, measuring the relatedness
between short texts written in various languages is a
challenging problem. In addition to handling multiple
languages, the semantic gaps among all languages must
be considered. In this article, we propose two
Wikipedia-based semantic relatedness measurement
methods for multilingual short text clustering. The
proposed methods solve the semantic gap problem by
incorporating the inter-language links of Wikipedia
into Extended Naive Bayes (ENB), a probabilistic method
that can be applied to measure semantic relatedness
among monolingual short texts. The proposed methods
represent a multilingual short text as a vector of the
English version of Wikipedia articles (entities). By
transferring texts to a unified vector space, the
relatedness between texts in different languages with
similar meanings can be increased. We also propose an
approach that can improve clustering performance and
reduce the processing time by eliminating
language-specific entities in the unified vector space.
Experimental results on multilingual Twitter message
clustering revealed that the proposed methods
outperformed cross-lingual explicit semantic analysis,
a previously proposed method to measure relatedness
between texts in different languages. Moreover, the
proposed methods were comparable to ENB applied to
texts translated into English using a proprietary
translation service. The proposed methods enabled
relatedness measurements for multilingual short text
clustering without requiring machine translation
processes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "16",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ding:2019:NFF,
author = "Chenchen Ding and Masao Utiyama and Eiichiro Sumita",
title = "{NOVA}: a Feasible and Flexible Annotation System for
Joint Tokenization and Part-of-Speech Tagging",
journal = j-TALLIP,
volume = "18",
number = "2",
pages = "17:1--17:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3276773",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3276773",
abstract = "A feasible and flexible annotation system is designed
for joint tokenization and part-of-speech (POS) tagging
to annotate those languages without natural definitions
of words. This design was motivated by the fact that
word separators are not used in many highly analytic
East and Southeast Asian languages. Although several of
the languages are well-studied, e.g., Chinese and
Japanese, many are understudied with low resources,
e.g., Burmese (Myanmar) and Khmer. In the first part of
the article, the proposed annotation system, named
nova, is introduced. nova contains only four basic tags
(n, v, a, and o); these tags can be further modified
and combined to adapt complex linguistic phenomena in
tokenization and POS tagging. In the second part of the
article, the feasibility and flexibility of nova is
illustrated from the annotation practice on Burmese and
Khmer. The relation between nova and two universal POS
tagsets is discussed in the final part of the
article.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "17",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ahmadi:2019:RBK,
author = "Sina Ahmadi",
title = "A Rule-Based {Kurdish} Text Transliteration System",
journal = j-TALLIP,
volume = "18",
number = "2",
pages = "18:1--18:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3278623",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3278623",
abstract = "In this article, we present a rule-based approach for
transliterating two of the most used orthographies in
Sorani Kurdish. Our work consists of detecting a
character in a word by removing the possible
ambiguities and mapping it into the target orthography.
We describe different challenges in Kurdish text mining
and propose novel ideas concerning the transliteration
task for Sorani Kurdish. Our transliteration system,
named Wergor, achieves 82.79\% overall precision and
more than 99\% in detecting the double-usage
characters. We also present a manually transliterated
corpus for Kurdish.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "18",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kamila:2019:THL,
author = "Sabyasachi Kamila and Mohammad Hasanuzzaman and Asif
Ekbal and Pushpak Bhattacharyya",
title = "{Tempo-HindiWordNet}: a Lexical Knowledge-base for
Temporal Information Processing",
journal = j-TALLIP,
volume = "18",
number = "2",
pages = "19:1--19:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3277504",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3277504",
abstract = "Temporality has significantly contributed to various
Natural Language Processing and Information Retrieval
applications. In this article, we first create a
lexical knowledge-base in Hindi by identifying the
temporal orientation of word senses based on their
definition and then use this resource to detect
underlying temporal orientation of the sentences. To
create the resource, we propose a semi-supervised
learning framework, where each synset of the Hindi
WordNet is classified into one of the five categories,
namely, past, present, future, neutral, and atemporal.
The algorithm initiates learning with a set of seed
synsets and then iterates following different expansion
strategies, viz. probabilistic expansion based on
classifier's confidence and semantic distance based
measures. We manifest the usefulness of the resource
that we build on an external task, viz. sentence-level
temporal classification. The underlying idea is that a
temporal knowledge-base can help in classifying the
sentences according to their inherent temporal
properties. Experiments on two different domains, viz.
general and Twitter, show interesting results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "19",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Alnawas:2019:SAI,
author = "Anwar Alnawas and Nursal Arici",
title = "Sentiment Analysis of {Iraqi Arabic} Dialect on
{Facebook} Based on Distributed Representations of
Documents",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "20:1--20:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3278605",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3278605",
abstract = "Nowadays, social media is used by many people to
express their opinions about a variety of topics.
Opinion Mining or Sentiment Analysis techniques extract
opinions from user generated contents. Over the years,
a multitude of Sentiment Analysis studies has been done
about the English language with deficiencies of
research in all other languages. Unfortunately, Arabic
is one of the languages that seems to lack substantial
research, despite the rapid growth of its use on social
media outlets. Furthermore, specific Arabic dialects
should be studied, not just Modern Standard Arabic. In
this paper, we experiment sentiments analysis of Iraqi
Arabic dialect using word embedding. First, we made a
large corpus from previous works to learn word
representations. Second, we generated word embedding
model by training corpus using Doc2Vec representations
based on Paragraph and Distributed Memory Model of
Paragraph Vectors (DM-PV) architecture. Lastly, the
represented feature used for training four binary
classifiers (Logistic Regression, Decision Tree,
Support Vector Machine and Naive Bayes) to detect
sentiment. We also experimented different values of
parameters (window size, dimension and negative
samples). In the light of the experiments, it can be
concluded that our approach achieves a better
performance for Logistic Regression and Support Vector
Machine than the other classifiers.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "20",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Singh:2019:OHG,
author = "Sukhdeep Singh and Anuj Sharma",
title = "Online Handwritten {Gurmukhi} Words Recognition: an
Inclusive Study",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "21:1--21:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3282441",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3282441",
abstract = "Identification of offline and online handwritten words
is a challenging and complex task. In comparison to
Latin and Oriental scripts, the research and study of
handwriting recognition at word level in Indic scripts
is at its initial phases. The two main methods of
handwriting recognition are global and analytical. The
present work introduces a novel analytical approach for
online handwritten Gurmukhi word recognition based on a
minimal set of words and recognizes an input Gurmukhi
word as a sequence of characters. We employed a
sequential step-by-step approach to recognize online
handwritten Gurmukhi words. Considering the massive
variability in online Gurmukhi handwriting, the present
work employs the completely linked non-homogeneous
hidden Markov model. In the present study, we
considered the dependent, major-dependent, and
super-dependent nature of strokes to form Gurmukhi
characters in words. On test sets of online handwritten
Gurmukhi datasets, the word-level accuracy rates are
85.98\%, 84.80\%, 82.40\%, and 82.20\% in four
different modes. Besides the online Gurmukhi word
recognition, the present work also provides Gurmukhi
handwriting analysis study for varying writing styles
and proposes novel techniques for zone detection and
rearrangement of strokes. Our proposed algorithms have
been successfully employed to online handwritten
Gurmukhi word recognition in dependent and independent
modes of handwriting.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "21",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yucesoy:2019:COW,
author = "Veysel Y{\"u}cesoy and Aykut Ko{\c{c}}",
title = "Co-occurrence Weight Selection in Generation of Word
Embeddings for Low Resource Languages",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "22:1--22:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3282443",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3282443",
abstract = "This study aims to increase the performance of word
embeddings by proposing a new weighting scheme for
co-occurrence counting. The idea behind this new family
of weights is to overcome the disadvantage of distant
appearing word pairs, which are indeed semantically
close, while representing them in the co-occurrence
counting. For high-resource languages, this
disadvantage might not be effective due to the high
frequency of co-occurrence. However, when there are not
enough available resources, such pairs suffer from
being distant. To favour such pairs, a weighting scheme
based on a polynomial fitting procedure is proposed to
shift the weights up for distant words while the
weights of nearby words are left almost unchanged. The
parameter optimization for new weights and the effects
of the weighting scheme are analysed for the English,
Italian, and Turkish languages. A small portion of
English resources and a quarter of Italian resources
are utilized for demonstration purposes, as if these
languages are low-resource languages. Performance
increase is observed in analogy tests when the proposed
weighting scheme is applied to relatively small corpora
(i.e., mimicking low-resource languages) of both
English and Italian. To show the effectiveness of the
proposed scheme in small corpora, it is also shown for
a large English corpus that the performance of the
proposed weighting scheme cannot outperform the
original weights. Since Turkish is relatively a
low-resource language, it is demonstrated that the
proposed weighting scheme can increase the performance
of both analogy and similarity tests when all Turkish
Wikipedia pages are utilized as a corpus. The positive
effect of the proposed scheme has also been
demonstrated in a standard sentiment analysis task for
the Turkish language.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "22",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bounhas:2019:UCA,
author = "Ibrahim Bounhas",
title = "On the Usage of a Classical {Arabic} Corpus as a
Language Resource: Related Research and Key
Challenges",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "23:1--23:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3277591",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3277591",
abstract = "This article presents a literature review of
computer-science-related research applied on hadith, a
kind of Arabic narration which appeared in the 7th
century. We study and compare existent works in several
fields of Natural Language Processing (NLP),
Information Retrieval (IR), and Knowledge Extraction
(KE). Thus, we illicit their main drawbacks and
identify some perspectives, which may be considered by
the research community. We also study the
characteristics of these types of documents, by
enumerating the advantages/limits of using hadith as a
language resource. Moreover, our study shows that
previous studies used different collections of hadiths,
thus making it hard to compare their results
objectively. Besides, many preprocessing steps are
recurrent through these applications, thus wasting a
lot of time. Consequently, the key issues for building
generic language resources from hadiths are discussed,
taking into account the relevance of related literature
and the wide community of researchers that are
interested in these narrations. The ultimate goal is to
structure hadith books for multiple usages, thus
building common collections which may be exploited in
future applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "23",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jung:2019:MPN,
author = "Sangkeun Jung and Cheon-Eum Park and Changki Lee",
title = "Multitask Pointer Network for {Korean} Dependency
Parsing",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "24:1--24:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3282442",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3282442",
abstract = "Dependency parsing is a fundamental problem in natural
language processing. We introduce a novel
dependency-parsing framework called
head-pointing--based dependency parsing. In this
framework, we cast the Korean dependency parsing
problem as a statistical head-pointing and arc-labeling
problem. To address this problem, a novel neural
network called the multitask pointer network is devised
for a neural sequential head-pointing and type-labeling
architecture. Our approach does not require any
handcrafted features or language-specific rules to
parse dependency. Furthermore, it achieves
state-of-the-art performance for Korean dependency
parsing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "24",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bolucu:2019:UJP,
author = "Necva B{\"o}l{\"u}c{\"u} and Burcu Can",
title = "Unsupervised Joint {PoS} Tagging and Stemming for
Agglutinative Languages",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "25:1--25:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3292398",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3292398",
abstract = "The number of possible word forms is theoretically
infinite in agglutinative languages. This brings up the
out-of-vocabulary (OOV) issue for part-of-speech (PoS)
tagging in agglutinative languages. Since inflectional
morphology does not change the PoS tag of a word, we
propose to learn stems along with PoS tags
simultaneously. Therefore, we aim to overcome the
sparsity problem by reducing word forms into their
stems. We adopt a Bayesian model that is fully
unsupervised. We build a Hidden Markov Model for PoS
tagging where the stems are emitted through hidden
states. Several versions of the model are introduced in
order to observe the effects of different dependencies
throughout the corpus, such as the dependency between
stems and PoS tags or between PoS tags and affixes.
Additionally, we use neural word embeddings to estimate
the semantic similarity between the word form and stem.
We use the semantic similarity as prior information to
discover the actual stem of a word since inflection
does not change the meaning of a word. We compare our
models with other unsupervised stemming and PoS tagging
models on Turkish, Hungarian, Finnish, Basque, and
English. The results show that a joint model for PoS
tagging and stemming improves on an independent PoS
tagger and stemmer in agglutinative languages.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "25",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kang:2019:SDR,
author = "Xiaomian Kang and Chengqing Zong and Nianwen Xue",
title = "A Survey of Discourse Representations for {Chinese}
Discourse Annotation",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "26:1--26:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3293442",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3293442",
abstract = "A key element in computational discourse analysis is
the design of a formal representation for the discourse
structure of a text. With machine learning being the
dominant method, it is important to identify a
discourse representation that can be used to perform
large-scale annotation. This survey provides a
systematic analysis of existing discourse
representation theories to evaluate whether they are
suitable for annotation of Chinese text. Specifically,
the two properties, expressiveness and practicality,
are introduced to compare the representations of
theories based on rhetorical relations and the
representations of theories based on entity relations.
The comparison systematically reveals linguistic and
computational characteristics of the theories. After
that, we conclude that none of the existing theories
are quite suitable for scalable Chinese discourse
annotation because they are not both expressive and
practical. Therefore, a new discourse representation
needs to be proposed, which should balance the
expressiveness and practicality, and cover rhetorical
relations and entity relations. Inspired by the
conclusions, this survey discusses some preliminary
proposals on how to represent the discourse structure
that are worth pursuing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "26",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Badaro:2019:SOM,
author = "Gilbert Badaro and Ramy Baly and Hazem Hajj and Wassim
El-Hajj and Khaled Bashir Shaban and Nizar Habash and
Ahmad Al-Sallab and Ali Hamdi",
title = "A Survey of Opinion Mining in {Arabic}: a
Comprehensive System Perspective Covering Challenges
and Advances in Tools, Resources, Models, Applications,
and Visualizations",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "27:1--27:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3295662",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3295662",
abstract = "Opinion-mining or sentiment analysis continues to gain
interest in industry and academics. While there has
been significant progress in developing models for
sentiment analysis, the field remains an active area of
research for many languages across the world, and in
particular for the Arabic language, which is the fifth
most-spoken language and has become the fourth
most-used language on the Internet. With the flurry of
research activity in Arabic opinion mining, several
researchers have provided surveys to capture advances
in the field. While these surveys capture a wealth of
important progress in the field, the fast pace of
advances in machine learning and natural language
processing (NLP) necessitates a continuous need for a
more up-to-date literature survey. The aim of this
article is to provide a comprehensive literature survey
for state-of-the-art advances in Arabic opinion mining.
The survey goes beyond surveying previous works that
were primarily focused on classification models.
Instead, this article provides a comprehensive system
perspective by covering advances in different aspects
of an opinion-mining system, including advances in NLP
software tools, lexical sentiment and corpora
resources, classification models, and applications of
opinion mining. It also presents future directions for
opinion mining in Arabic. The survey also covers latest
advances in the field, including deep learning advances
in Arabic Opinion Mining. The article provides
state-of-the-art information to help new or established
researchers in the field as well as industry developers
who aim to deploy an operational complete
opinion-mining system. Key insights are captured at the
end of each section for particular aspects of the
opinion-mining system giving the reader a choice of
focusing on particular aspects of interest.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "27",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Masmoudi:2019:ADR,
author = "Abir Masmoudi and Salima Mdhaffar and Rahma Sellami
and Lamia Hadrich Belguith",
title = "Automatic Diacritics Restoration for {Tunisian}
Dialect",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "28:1--28:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3297278",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3297278",
abstract = "Modern Standard Arabic, as well as Arabic dialect
languages, are usually written without diacritics. The
absence of these marks constitute a real problem in the
automatic processing of these data by NLP tools.
Indeed, writing Arabic without diacritics introduces
several types of ambiguity. First, a word without
diacratics could have many possible meanings depending
on their diacritization. Second, undiacritized surface
forms of an Arabic word might have as many as 200
readings depending on the complexity of its morphology
[12]. In fact, the agglutination property of Arabic
might produce a problem that can only be resolved using
diacritics. Third, without diacritics a word could have
many possible parts of speech (POS) instead of one.
This is the case with the words that have the same
spelling and POS tag but a different lexical sense, or
words that have the same spelling but different POS
tags and lexical senses [8]. Finally, there is
ambiguity at the grammatical level (syntactic
ambiguity). In this article, we propose the first work
that investigates the automatic diacritization of
Tunisian Dialect texts. We first describe our
annotation guidelines and procedure. Then, we propose
two major models, namely a statistical machine
translation (SMT) and a discriminative model as a
sequence classification task based on Conditional
Random Fields (CRF). In the second approach, we
integrate POS features to influence the generation of
diacritics. Diacritics restoration was performed at
both the word and the character levels. The results
showed high scores of automatic diacritization based on
the CRF system (Word Error Rate (WER) 21.44\% for CRF
and WER 34.6\% for SMT).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "28",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Rudra:2019:IAD,
author = "Koustav Rudra and Ashish Sharma and Kalika Bali and
Monojit Choudhury and Niloy Ganguly",
title = "Identifying and Analyzing Different Aspects of
{English--Hindi} Code-Switching in {Twitter}",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "29:1--29:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314935",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314935",
abstract = "Code-switching or the juxtaposition of linguistic
units from two or more languages in a single utterance,
has, in recent times, become very common in text,
thanks to social media and other computer mediated
forms of communication. In this exploratory study of
English-Hindi code-switching on Twitter, we
automatically create a large corpus of code-switched
tweets and devise techniques to identify the
relationship between successive components in a
code-switched tweet. More specifically, we identify
pragmatic functions such as narrative-evaluative,
negative reinforcement, translation or semantically
equivalent statements, and so on characterizing the
relation between successive components. We analyze the
difference/similarity between switching patterns in
code-switched and monolingual multi-component tweets.
We observe strong dominance of narrative-evaluative
(non-opinion to opinion or vice versa) switching in
case of both code-switched and monolingual
multi-component tweets in around 40\% of cases.
Polarity switching appears to be a prevalent switching
phenomenon (10\%) specifically in code-switched tweets
(three to four times higher than monolingual
multi-component tweets) where preference of expressing
negative sentiment in Hindi is approximately twice
compared to English. Positive reinforcement appears to
be an important pragmatic function for English
multi-component tweets, whereas negative reinforcement
plays a key role for Devanagari multi-component tweets.
Our results also indicate that the extent and nature of
code-switching also strongly depend on the topic
(sports, politics, etc.) of discussion.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "29",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Verma:2019:CAH,
author = "Pradeepika Verma and Sukomal Pal and Hari Om",
title = "A Comparative Analysis on {Hindi} and {English}
Extractive Text Summarization",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "30:1--30:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3308754",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3308754",
abstract = "Text summarization is the process of transfiguring a
large documental information into a clear and concise
form. In this article, we present a detailed
comparative study of various extractive methods for
automatic text summarization on Hindi and English text
datasets of news articles. We consider 13 different
summarization techniques, namely, TextRank, LexRank,
Luhn, LSA, Edmundson, ChunkRank, TGraph, UniRank,
NN-ED, NN-SE, FE-SE, SummaRuNNer, and MMR-SE, and we
evaluate their performance using various performance
metrics, such as precision, recall, F$_1$, cohesion,
non-redundancy, readability, and significance. A
thorough analysis is done in eight different parts that
exhibits the strengths and limitations of these
methods, effect of performance over the summary length,
impact of language of a document, and other factors as
well. A standard summary evaluation tool (ROUGE) and
extensive programmatic evaluation using Python 3.5 in
Anaconda environment are used to evaluate their
outcome.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "30",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wei:2019:ROD,
author = "Bingzhen Wei and Xuancheng Ren and Yi Zhang and
Xiaoyan Cai and Qi Su and Xu Sun",
title = "Regularizing Output Distribution of Abstractive
{Chinese} Social Media Text Summarization for Improved
Semantic Consistency",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "31:1--31:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314934",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314934",
abstract = "Abstractive text summarization is a highly difficult
problem, and the sequence-to-sequence model has shown
success in improving the performance on the task.
However, the generated summaries are often inconsistent
with the source content in semantics. In such cases,
when generating summaries, the model selects
semantically unrelated words with respect to the source
content as the most probable output. The problem can be
attributed to heuristically constructed training data,
where summaries can be unrelated to the source content,
thus containing semantically unrelated words and
spurious word correspondence. In this article, we
propose a regularization approach for the
sequence-to-sequence model and make use of what the
model has learned to regularize the learning objective
to alleviate the effect of the problem. In addition, we
propose a practical human evaluation method to address
the problem that the existing automatic evaluation
method does not evaluate the semantic consistency with
the source content properly. Experimental results
demonstrate the effectiveness of the proposed approach,
which outperforms almost all the existing models.
Especially, the proposed approach improves the semantic
consistency by 4\% in terms of human evaluation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "31",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Trieu:2019:LAR,
author = "Hai-Long Trieu and Duc-Vu Tran and Ashwin Ittoo and
Le-Minh Nguyen",
title = "Leveraging Additional Resources for Improving
Statistical Machine Translation on {Asian} Low-Resource
Languages",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "32:1--32:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314936",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314936",
abstract = "Phrase-based machine translation (MT) systems require
large bilingual corpora for training. Nevertheless,
such large bilingual corpora are unavailable for most
language pairs in the world, causing a bottleneck for
the development of MT. For the Asian language
pairs-Japanese, Indonesian, Malay paired with
Vietnamese-they are also not excluded from the case, in
which there are no large bilingual corpora on these
low-resource language pairs. Furthermore, although the
languages are widely used in the world, there is no
prior work on MT, which causes an issue for the
development of MT on these languages. In this article,
we conducted an empirical study of leveraging
additional resources to improve MT for the Asian
low-resource language pairs: translation from Japanese,
Indonesian, and Malay to Vietnamese. We propose an
innovative approach that lies in two strategies of
building bilingual corpora from comparable data and
phrase pivot translation on existing bilingual corpora
of the languages paired with English. Bilingual corpora
were built from Wikipedia bilingual titles to enhance
bilingual data for the low-resource languages.
Additionally, we introduced a combined model of the
additional resources to create an effective solution to
improve MT on the Asian low-resource languages.
Experimental results show the effectiveness of our
systems with the improvement of +2 to +7 BLEU points.
This work contributes to the development of MT on
low-resource languages, especially opening a promising
direction for the progress of MT on the Asian language
pairs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "32",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Dehghan:2019:CDS,
author = "Mohammad Hossein Dehghan and Heshaam Faili",
title = "Converting Dependency Structure Into {Persian} Phrase
Structure",
journal = j-TALLIP,
volume = "18",
number = "3",
pages = "33:1--33:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314937",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:32 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314937",
abstract = "Treebank is one of the important and useful resources
in natural language processing represented in two
different annotated schemas: phrase and dependency
structures. There are many works that convert a phrase
structure into a dependency structure and vice versa.
Most of them are based that exploit the handcrafted
head percolation table and argument table in predefined
deterministic ways. In this article, we propose a
method to convert a dependency structure into a phrase
structure by enriching a trainable model of former
hybrid strategy approach. By adding a classifier to the
algorithm and using postprocessing modification, the
quality of conversion is increased. We evaluate our
method in two different languages, English and Persian,
and then analyze the errors. The results of our
experiments show a 46.01\% reduction of error rate in
English and 76.50\% for Persian compared to our
baseline. We build a new phrase structure treebank by
converting 10,000 sentences of Persian dependency
treebank into corresponding phrase structures and
correcting them manually.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "33",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Awais:2019:RDI,
author = "Muhammad Awais and Muhammad Shoaib",
title = "Role of Discourse Information in {Urdu} Sentiment
Classification: a Rule-based Method and
Machine-learning Technique",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "34:1--34:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3300050",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3300050",
abstract = "In computational linguistics, sentiment analysis
refers to the classification of opinions in a positive
class or a negative class. There exist a lot of
different methods for sentiment analysis of the English
language, but the literature lacks the availability of
methods and techniques for Urdu, which is the largely
spoken language in the South Asian sub-continent and
the national language of Pakistan. The currently
available techniques, such as adjective count method
known as Bag of Words (BoW), is not sufficient for
classification of complex sentiment written in the Urdu
language. Also, the performance of available
machine-learning techniques (with legacy features), for
classification of Urdu sentiments, are not comparable
with the achieved accuracy of other languages. In the
case of the English language, the discourse information
(sub-sentence-level information) boosts the performance
of both the BoW method and machine-learning techniques,
but there are very few works available that have tested
the context-level information for the sentiment
analysis of the Urdu language. This research aims to
extract the discourse information from the Urdu
sentiments and utilise the discourse information to
improve the performance and reduce the error rate of
existing techniques for Urdu Sentiment classification.
The proposed solution extracts the discourse
information, suggests a new set of features for
machine-learning techniques, and introduces a set of
rules to extend the capabilities of the BoW model. The
results show that the task has been enhanced
significantly and the performance metrics such as
recall, precision, and accuracy are increased by
31.25\%, 8.46\%, and 21.6\%, respectively. In future,
the proposed technique can be extended to sentiments
with more than two sub-opinions, such as for blogs,
reviews, and TV talk shows.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "34",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Nongmeikapam:2019:HMM,
author = "Kishorjit Nongmeikapam and Kanan Wahengbam and Oinam
Nickson Meetei and Themrichon Tuithung",
title = "Handwritten {Manipuri Meetei--Mayek} Classification
Using Convolutional Neural Network",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "35:1--35:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3309497",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3309497",
abstract = "A new technique for classifying all 56 different
characters of the Manipuri Meetei-Mayek (MMM) is
proposed herein. The characters are grouped under five
categories, which are Eeyek Eepee (original alphabets),
Lom Eeyek (additional letters), Cheising Eeyek
(digits), Lonsum Eeyek (letters with short endings),
and Cheitap Eeyek (vowel signs). Two related works
proposed by previous researchers are studied for
understanding the benefits claimed by the proposed deep
learning approach in handwritten Manipuri Meetei-Mayek.
(1) Histogram of Oriented (HOG) with SVM classifier is
implemented for thoroughly understanding how HOG
features can influence accuracy. (2) The handwritten
samples are trained using simple Convolutional Neural
Network (CNN) and compared with the proposed CNN-based
architecture. Significant progress has been made in the
field of Optical Character Recognition (OCR) for
well-known Indian languages as well as globally popular
languages. Our work is novel in the sense that there is
no record of work available to date that is able to
classify all 56 classes of the MMM. It will also serve
as a pre-cursor for developing end-to-end OCR software
for translating old manuscripts, newspaper archives,
books, and so on.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "35",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Gao:2019:SBC,
author = "Shengxiang Gao and Jihao Huang and Mingya Xue and
Zhengtao Yu and Zhuo Wang and Yang Zhang",
title = "Syntax-Based {Chinese--Vietnamese} Tree-to-Tree
Statistical Machine Translation with Bilingual
Features",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "36:1--36:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314938",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314938",
abstract = "Because of the scarcity of bilingual corpora, current
Chinese--Vietnamese machine translation is far from
satisfactory. Considering the differences between
Chinese and Vietnamese, we investigate whether
linguistic differences can be used to supervise machine
translation and propose a method of syntax-based
Chinese--Vietnamese tree-to-tree statistical machine
translation with bilingual features. Analyzing the
syntax differences between Chinese and Vietnamese, we
define some linguistic difference-based rules, such as
attributive position, time adverbial position, and
locative adverbial position, and create rewards for
similar rules. These rewards are integrated into the
extraction of tree-to-tree translation rules, and we
optimize the pruning of the search space during the
decoding phase. The experiments on Chinese--Vietnamese
bilingual sentence translation show that the proposed
method performs better than several compared methods.
Further, the results show that syntactic difference
features, with search pruning, can improve the accuracy
of machine translation without degrading the
efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "36",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sun:2019:NSP,
author = "Ruiyong Sun and Yijia Zhao and Qi Zhang and Keyu Ding
and Shijin Wang and Cui Wei",
title = "A Neural Semantic Parser for Math Problems
Incorporating Multi-Sentence Information",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "37:1--37:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314939",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314939",
abstract = "In this article, we study the problem of parsing a
math problem into logical forms. It is an essential
pre-processing step for automatically solving math
problems. Most of the existing studies about semantic
parsing mainly focused on the single-sentence level.
However, for parsing math problems, we need to take the
information of multiple sentences into consideration.
To achieve the task, we formulate the task as a machine
translation problem and extend the sequence-to-sequence
model with a novel two-encoder architecture and a
word-level selective mechanism. For training and
evaluating the proposed method, we construct a
large-scale dataset. Experimental results show that the
proposed two-encoder architecture and word-level
selective mechanism could bring significant
improvement. The proposed method can achieve better
performance than the state-of-the-art methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "37",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Maimaiti:2019:MRT,
author = "Mieradilijiang Maimaiti and Yang Liu and Huanbo Luan
and Maosong Sun",
title = "Multi-Round Transfer Learning for Low-Resource {NMT}
Using Multiple High-Resource Languages",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "38:1--38:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314945",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314945",
abstract = "Neural machine translation (NMT) has made remarkable
progress in recent years, but the performance of NMT
suffers from a data sparsity problem since large-scale
parallel corpora are only readily available for
high-resource languages (HRLs). In recent days,
transfer learning (TL) has been used widely in
low-resource languages (LRLs) machine translation,
while TL is becoming one of the vital directions for
addressing the data sparsity problem in low-resource
NMT. As a solution, a transfer learning method in NMT
is generally obtained via initializing the low-resource
model (child) with the high-resource model (parent).
However, leveraging the original TL to low-resource
models is neither able to make full use of highly
related multiple HRLs nor to receive different
parameters from the same parents. In order to exploit
multiple HRLs effectively, we present a
language-independent and straightforward multi-round
transfer learning (MRTL) approach to low-resource NMT.
Besides, with the intention of reducing the differences
between high-resource and low-resource languages at the
character level, we introduce a unified transliteration
method for various language families, which are both
semantically and syntactically highly analogous with
each other. Experiments on low-resource datasets show
that our approaches are effective, significantly
outperform the state-of-the-art methods, and yield
improvements of up to 5.63 BLEU points.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "38",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ihasz:2019:SFS,
author = "Peter Lajos Ihasz and Mate Kovacs and Ian Piumarta and
Victor V. Kryssanov",
title = "A Supplementary Feature Set for Sentiment Analysis in
{Japanese} Dialogues",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "39:1--39:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3310283",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310283",
abstract = "Recently, real-time affect-awareness has been applied
in several commercial systems, such as dialogue systems
and computer games. Real-time recognition of affective
states, however, requires the application of costly
feature extraction methods and/or labor-intensive
annotation of large datasets, especially in the case of
Asian languages where large annotated datasets are
seldom available. To improve recognition accuracy, we
propose the use of cognitive context in the form of
``emotion-sensitive'' intentions. Intentions are often
represented through dialogue acts and, as an
emotion-sensitive model of dialogue acts, a tagset of
interpersonal-relations-directing interpersonal acts
(the IA model) is proposed. The model's adequacy is
assessed using a sentiment classification task in
comparison with two well-known dialogue act models, the
SWBD-DAMSL and the DIT++. For the assessment, five
Japanese in-game dialogues were annotated with labels
of sentiments and the tags of all three dialogue act
models which were used to enhance a baseline sentiment
classifier system. The adequacy of the IA tagset is
demonstrated by a 9\% improvement to the baseline
sentiment classifier's recognition accuracy,
outperforming the other two models by more than 5\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "39",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Saeed:2019:SAC,
author = "Ali Saeed and Rao Muhammad Adeel Nawab and Mark
Stevenson and Paul Rayson",
title = "A Sense Annotated Corpus for All-Words {Urdu} Word
Sense Disambiguation",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "40:1--40:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314940",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314940",
abstract = "Word Sense Disambiguation (WSD) aims to automatically
predict the correct sense of a word used in a given
context. All human languages exhibit word sense
ambiguity, and resolving this ambiguity can be
difficult. Standard benchmark resources are required to
develop, compare, and evaluate WSD techniques. These
are available for many languages, but not for Urdu,
despite this being a language with more than 300
million speakers and large volumes of text available
digitally. To fill this gap, this study proposes a
novel benchmark corpus for the Urdu All-Words WSD task.
The corpus contains 5,042 words of Urdu running text in
which all ambiguous words (856 instances) are manually
tagged with senses from the Urdu Lughat dictionary. A
range of baseline WSD models based on n -gram are
applied to the corpus, and the best performance
(accuracy of 57.71\%) is achieved using word 4-gram.
The corpus is freely available to the research
community to encourage further WSD research in Urdu.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "40",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Dahou:2019:MCE,
author = "Abdelghani Dahou and Shengwu Xiong and Junwei Zhou and
Mohamed Abd Elaziz",
title = "Multi-Channel Embedding Convolutional Neural Network
Model for {Arabic} Sentiment Classification",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "41:1--41:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314941",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314941",
abstract = "With the advent of social network services, Arabs'
opinions on the web have attracted many researchers in
recent years toward detecting and classifying
sentiments in Arabic tweets and reviews. However, the
impact of word embeddings vectors (WEVs) initialization
and dataset balance on Arabic sentiment classification
using deep learning has not been thoroughly studied. In
this article, a multi-channel embedding convolutional
neural network (MCE-CNN) is proposed to improve Arabic
sentiment classification by learning sentiment features
from different text domains, word, and character
n-grams levels. MCE-CNN encodes a combination of
different pre-trained word embeddings into the
embedding block at each embedding channel and trains
these channels in parallel. Besides, a separate feature
extraction module implemented in a CNN block is used to
extract more relevant sentiment features. These
channels and blocks help to start training on
high-quality WEVs and fine-tuning them. The performance
of MCE-CNN is evaluated on several standard balanced
and imbalanced datasets to reflect real-world use
cases. Experimental results show that MCE-CNN provides
a high classification accuracy and benefits from the
second embedding channel on both standard Arabic and
dialectal Arabic text, which outperforms
state-of-the-art methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "41",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Onyenwe:2019:TEI,
author = "Ikechukwu E. Onyenwe and Mark Hepple and Uchechukwu
Chinedu and Ignatius Ezeani",
title = "Toward an Effective {Igbo} Part-of-Speech Tagger",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "42:1--42:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314942",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314942",
abstract = "Part-of-speech (POS) tagging is a well-established
technology for most Western European languages and a
few other world languages, but it has not been
evaluated on Igbo, an agglutinative African language.
This article presents POS tagging experiments conducted
using an Igbo corpus as a test bed for identifying the
POS taggers and the Machine Learning (ML) methods that
can achieve a good performance with the small dataset
available for the language. Experiments have been
conducted using different well-known POS taggers
developed for English or European languages, and
different training data styles and sizes. Igbo has a
number of language-specific characteristics that
present a challenge for effective POS tagging. One
interesting case is the wide use of verbs (and
nominalizations thereof) that have an inherent noun
complement, which form ``linked pairs'' in the POS
tagging scheme, but which may appear discontinuously.
Another issue is Igbo's highly productive agglutinative
morphology, which can produce many variant word forms
from a given root. This productivity is a key cause of
the out-of-vocabulary (OOV) words observed during Igbo
tagging. We report results of experiments on a
promising direction for improving tagging performance
on such morphologically-inflected OOV words.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "42",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Costa-Jussa:2019:CCN,
author = "Marta R. Costa-Juss{\`a} and No{\'e} Casas and Carlos
Escolano and Jos{\'e} A. R. Fonollosa",
title = "{Chinese--Catalan}: a Neural Machine Translation
Approach Based on Pivoting and Attention Mechanisms",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "43:1--43:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3312575",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3312575",
abstract = "This article innovatively addresses machine
translation from Chinese to Catalan using neural pivot
strategies trained without any direct parallel data.
The Catalan language is very similar to Spanish from a
linguistic point of view, which motivates the use of
Spanish as pivot language. Regarding neural
architecture, we are using the latest state-of-the-art,
which is the Transformer model, only based on attention
mechanisms. Additionally, this work provides new
resources to the community, which consists of a
human-developed gold standard of 4,000 sentences
between Catalan and Chinese and all the others United
Nations official languages (Arabic, English, French,
Russian, and Spanish). Results show that the standard
pseudo-corpus or synthetic pivot approach performs
better than cascade.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "43",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yu:2019:MTE,
author = "Hui Yu and Weizhi Xu and Shouxun Lin and Qun Liu",
title = "Machine Translation Evaluation Metric Based on
Dependency Parsing Model",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "44:1--44:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3312573",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3312573",
abstract = "Most of the syntax-based metrics obtain the similarity
by comparing the sub-structures extracted from the
trees of hypothesis and reference. These sub-structures
cannot represent all the information in the trees
because their lengths are limited. To sufficiently use
the reference syntax information, a new automatic
evaluation metric is proposed based on the dependency
parsing model. First, a dependency parsing model is
trained using the reference dependency tree for each
sentence. Then, the hypothesis is parsed by this
dependency parsing model and the corresponding
hypothesis dependency tree is generated. The quality of
hypothesis can be judged by the quality of the
hypothesis dependency tree. Unigram F-score is included
in the new metric so that lexicon similarity is
obtained. According to experimental results, the
proposed metric can perform better than METEOR and BLEU
on system level and get comparable results with METEOR
on sentence level. To further improve the performance,
we also propose a combined metric which gets the best
performance on the sentence level and on the system
level.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "44",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2019:EBC,
author = "Yang Liu and Shaonan Wang and Jiajun Zhang and
Chengqing Zong",
title = "Experience-based Causality Learning for Intelligent
Agents",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "45:1--45:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314943",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314943",
abstract = "Understanding causality in text is crucial for
intelligent agents. In this article, inspired by human
causality learning, we propose an experience-based
causality learning framework. Comparing to traditional
approaches, which attempt to handle the causality
problem relying on textual clues and linguistic
resources, we are the first to use experience
information for causality learning. Specifically, we
first construct various scenarios for intelligent
agents, thus, the agents can gain experience from
interaction in these scenarios. Then, human
participants build a number of training instances for
agents of causality learning based on these scenarios.
Each instance contains two sentences and a label. Each
sentence describes an event that an agent experienced
in a scenario, and the label indicates whether the
sentence (event) pair has a causal relation.
Accordingly, we propose a model that can infer the
causality in text using experience by accessing the
corresponding event information based on the input
sentence pair. Experiment results show that our method
can achieve impressive performance on the grounded
causality corpus and significantly outperform the
conventional approaches. Our work suggests that
experience is very important for intelligent agents to
understand causality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "45",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yin:2019:PTE,
author = "Yongjing Yin and Jinsong Su and Huating Wen and Jiali
Zeng and Yang Liu and Yidong Chen",
title = "{POS} Tag-enhanced Coarse-to-fine Attention for Neural
Machine Translation",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "46:1--46:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3321124",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3321124",
abstract = "Although neural machine translation (NMT) has certain
capability to implicitly learn semantic information of
sentences, we explore and show that Part-of-Speech
(POS) tags can be explicitly incorporated into the
attention mechanism of NMT effectively to yield further
improvements. In this article, we propose an NMT model
with tag-enhanced attention mechanism. In our model,
NMT and POS tagging are jointly modeled via multi-task
learning. Besides following common practice to enrich
encoder annotations by introducing predicted source POS
tags, we exploit predicted target POS tags to refine
attention model in a coarse-to-fine manner.
Specifically, we first implement a coarse attention
operation solely on source annotations and target
hidden state, where the produced context vector is
applied to update target hidden state used for target
POS tagging. Then, we perform a fine attention
operation that extends the coarse one by further
exploiting the predicted target POS tags. Finally, we
facilitate word prediction by simultaneously utilizing
the context vector from fine attention and the
predicted target POS tags. Experimental results and
further analyses on Chinese--English and
Japanese-English translation tasks demonstrate the
superiority of our proposed model over the conventional
NMT models. We release our code at
https://github.com/middlekisser/PEA-NMT.git.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "46",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yang:2019:MEA,
author = "Jun Yang and Runqi Yang and Hengyang Lu and Chongjun
Wang and Junyuan Xie",
title = "Multi-Entity Aspect-Based Sentiment Analysis with
Context, Entity, Aspect Memory and Dependency
Information",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "47:1--47:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3321125",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3321125",
abstract = "Fine-grained sentiment analysis is a useful tool for
producers to understand consumers' needs as well as
complaints about products and related aspects from
online platforms. In this article, we define a novel
task named ``Multi-Entity Aspect-Based Sentiment
Analysis (ME-ABSA)''. It investigates the sentiment
towards entities and their related aspects. It makes
the well-studied aspect-based sentiment analysis a
special case of this type, where the number of entities
is limited to one. We contribute a new dataset for this
task, with multi-entity Chinese posts in it. We propose
to model context, entity, and aspect memory to address
the task and incorporate dependency information for
further improvement. Experiments show that our methods
perform significantly better than baseline methods on
datasets for both ME-ABSA task and ABSA task. The
in-depth analysis further validates the effectiveness
of our methods and shows that our methods are capable
of generalizing to new (entity, aspect) combinations
with little loss of accuracy. This observation
indicates that data annotation in real applications can
be largely simplified.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "47",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kim:2019:MTS,
author = "Hyun Kim and Jong-Hyeok Lee and Seung-Hoon Na",
title = "Multi-task Stack Propagation for Neural Quality
Estimation",
journal = j-TALLIP,
volume = "18",
number = "4",
pages = "48:1--48:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3321127",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Oct 2 10:34:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3321127",
abstract = "Quality estimation is an important task in machine
translation that has attracted increased interest in
recent years. A key problem in translation-quality
estimation is the lack of a sufficient amount of the
quality annotated training data. To address this
shortcoming, the Predictor-Estimator was proposed
recently by introducing ``word prediction'' as an
additional pre-subtask that predicts a current target
word with consideration of surrounding source and
target contexts, resulting in a two-stage neural model
composed of a predictor and an estimator. However, the
original Predictor-Estimator is not trained on a
continuous stacking model but instead in a cascaded
manner that separately trains the predictor from the
estimator. In addition, the Predictor-Estimator is
trained based on single-task learning only, which uses
target-specific quality-estimation data without using
other training data that are available from other-level
quality-estimation tasks. In this article, we thus
propose a multi-task stack propagation, which
extensively applies stack propagation to fully train
the Predictor-Estimator on a continuous stacking
architecture and multi-task learning to enhance the
training data from related other-level
quality-estimation tasks. Experimental results on WMT17
quality-estimation datasets show that the
Predictor-Estimator trained with multi-task stack
propagation provides statistically significant
improvements over the baseline models. In particular,
under an ensemble setting, the proposed multi-task
stack propagation leads to state-of-the-art performance
at all the sentence/word/phrase levels for WMT17
quality estimation tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "48",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2020:GCL,
author = "Hongmin Wang and Jie Yang and Yue Zhang",
title = "From {Genesis} to {Creole} Language: Transfer Learning
for {Singlish} Universal Dependencies Parsing and {POS}
Tagging",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--29",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3321128",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3321128",
abstract = "Singlish can be interesting to the computational
linguistics community both linguistically, as a major
low-resource creole based on English, and
computationally, for information extraction and
sentiment analysis of regional social media. In our
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "1",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kong:2020:CZP,
author = "Fang Kong and Min Zhang and Guodong Zhou",
title = "{Chinese} Zero Pronoun Resolution: a Chain-to-chain
Approach",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--21",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3321129",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3321129",
abstract = "Chinese zero pronoun (ZP) resolution plays a critical
role in discourse analysis. Different from traditional
mention-to-mention approaches, this article proposes a
chain-to-chain approach to improve the performance of
ZP resolution in three aspects. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "2",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yin:2020:CZP,
author = "Qingyu Yin and Weinan Zhang and Yu Zhang and Ting
Liu",
title = "{Chinese} Zero Pronoun Resolution: a Collaborative
Filtering-based Approach",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--20",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3325884",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3325884",
abstract = "Semantic information that has been proven to be
necessary to the resolution of common noun phrases is
typically ignored by most existing Chinese zero pronoun
resolvers. This is because that zero pronouns convey no
descriptive information, which makes it \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "3",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Das:2020:TCT,
author = "Ayan Das and Sudeshna Sarkar",
title = "Transform, Combine, and Transfer: Delexicalized
Transfer Parser for Low-resource Languages",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--30",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3325886",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3325886",
abstract = "Transfer parsing has been used for developing
dependency parsers for languages with no treebank by
using transfer from treebanks of other languages
(source languages). In delexicalized transfer, parsed
words are replaced by their part-of-speech tags.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "4",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ding:2020:TBM,
author = "Chenchen Ding and Hnin Thu Zar Aye and Win Pa Pa and
Khin Thandar Nwet and Khin Mar Soe and Masao Utiyama
and Eiichiro Sumita",
title = "Towards {Burmese} ({Myanmar}) Morphological Analysis:
Syllable-based Tokenization and Part-of-speech
Tagging",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--34",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3325885",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3325885",
abstract = "This article presents a comprehensive study on two
primary tasks in Burmese (Myanmar) morphological
analysis: tokenization and part-of-speech (POS)
tagging. Twenty thousand Burmese sentences of newswire
are annotated with two-layer tokenization and
POS-\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "5",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2020:AMC,
author = "Dayiheng Liu and Kexin Yang and Qian Qu and Jiancheng
Lv",
title = "Ancient--Modern {Chinese} Translation with a New Large
Training Dataset",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--13",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3325887",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3325887",
abstract = "Ancient Chinese brings the wisdom and spirit culture
of the Chinese nation. Automatic translation from
ancient Chinese to modern Chinese helps to inherit and
carry forward the quintessence of the ancients.
However, the lack of large-scale parallel \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "6",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2020:CSP,
author = "Wei Wang and Degen Huang and Jingxiang Cao",
title = "{Chinese} Syntax Parsing Based on Sliding Match of
Semantic String",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--14",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3329707",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3329707",
abstract = "Different from the current syntax parsing based on
deep learning, we present a novel Chinese parsing
method, which is based on Sliding Match of Semantic
String (SMOSS). (1) Training stage: In a treebank,
headwords of tree nodes are represented by \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "7",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kanwal:2020:UNE,
author = "Safia Kanwal and Kamran Malik and Khurram Shahzad and
Faisal Aslam and Zubair Nawaz",
title = "{Urdu} Named Entity Recognition: Corpus Generation and
Deep Learning Applications",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--13",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3329710",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3329710",
abstract = "Named Entity Recognition (NER) plays a pivotal role in
various natural language processing tasks, such as
machine translation and automatic question-answering
systems. Recognizing the importance of NER, a plethora
of NER techniques for Western and Asian \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "8",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2020:DCW,
author = "Yijia Liu and Wanxiang Che and Yuxuan Wang and Bo
Zheng and Bing Qin and Ting Liu",
title = "Deep Contextualized Word Embeddings for Universal
Dependency Parsing",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--17",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3326497",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3326497",
abstract = "Deep contextualized word embeddings (Embeddings from
Language Model, short for ELMo), as an emerging and
effective replacement for the static word embeddings,
have achieved success on a bunch of syntactic and
semantic NLP problems. However, little is \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "9",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mehmood:2020:SAR,
author = "Khawar Mehmood and Daryl Essam and Kamran Shafi and
Muhammad Kamran Malik",
title = "Sentiment Analysis for a Resource Poor Language
---{Roman Urdu}",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--15",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3329709",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3329709",
abstract = "Sentiment analysis is an important sub-task of Natural
Language Processing that aims to determine the polarity
of a review. Most of the work done on sentiment
analysis is for the resource-rich languages of the
world, but very limited work has been done \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "10",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bakhshaei:2020:MGM,
author = "Somayeh Bakhshaei and Reza Safabakhsh and Shahram
Khadivi",
title = "Matching Graph, a Method for Extracting Parallel
Information from Comparable Corpora",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--29",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3329713",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3329713",
abstract = "Comparable corpora are valuable alternatives for the
expensive parallel corpora. They comprise informative
parallel fragments that are useful resources for
different natural language processing tasks. In this
work, a generative model is proposed for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "11",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2020:FTV,
author = "Dayiheng Liu and Yang Xue and Feng He and Yuanyuan
Chen and Jiancheng Lv",
title = "$ \mu $-Forcing: Training Variational Recurrent
Autoencoders for Text Generation",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--17",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3341110",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3341110",
abstract = "It has been previously observed that training
Variational Recurrent Autoencoders (VRAE) for text
generation suffers from serious uninformative latent
variables problems. The model would collapse into a
plain language model that totally ignores the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "12",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Srivastava:2020:AMA,
author = "Jyoti Srivastava and Sudip Sanyal and Ashish Kumar
Srivastava",
title = "An Automatic and a Machine-assisted Method to Clean
Bilingual Corpus",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--19",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3342351",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3342351",
abstract = "Two different methods of corpus cleaning are presented
in this article. One is a machine-assisted technique,
which is good to clean small-sized parallel corpus, and
the other is an automatic method, which is suitable for
cleaning large-sized parallel \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "13",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Prakash:2020:ISP,
author = "Jeena J. Prakash and Golda Brunet Rajan and Hema A.
Murthy",
title = "Importance of Signal Processing Cues in Transcription
Correction for Low-Resource {Indian} Languages",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--26",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3342352",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3342352",
abstract = "Accurate phonetic transcriptions are crucial for
building robust acoustic models for speech recognition
as well as speech synthesis applications. Phonetic
transcriptions are not usually provided with speech
corpora. A lexicon is used to generate phone-\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "14",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Han:2020:EMW,
author = "Dong Han and Junhui Li and Yachao Li and Min Zhang and
Guodong Zhou",
title = "Explicitly Modeling Word Translations in Neural
Machine Translation",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--17",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3342353",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3342353",
abstract = "In this article, we show that word translations can be
explicitly incorporated into NMT effectively to avoid
wrong translations. Specifically, we propose three
cross-lingual encoders to explicitly incorporate word
translations into NMT: (1) Factored\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "15",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Chakrabarty:2020:NNM,
author = "Abhisek Chakrabarty and Akshay Chaturvedi and Utpal
Garain",
title = "{NeuMorph}: Neural Morphological Tagging for
Low-Resource Languages --- an Experimental Study for
{Indic} Languages",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--19",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3342354",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3342354",
abstract = "This article deals with morphological tagging for
low-resource languages. For this purpose, five Indic
languages are taken as reference. In addition, two
severely resource-poor languages, Coptic and Kurmanji,
are also considered. The task entails \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "16",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ji:2020:ATU,
author = "Yatu Ji and Hongxu Hou and Junjie Chen and Nier Wu",
title = "Adversarial Training for Unknown Word Problems in
Neural Machine Translation",
journal = j-TALLIP,
volume = "19",
number = "1",
pages = "1--12",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3342482",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Jan 10 08:11:41 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3342482",
abstract = "Nearly all of the work in neural machine translation
(NMT) is limited to a quite restricted vocabulary,
crudely treating all other words the same as an unk
symbol. For the translation of language with abundant
morphology, unknown (UNK) words also \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "17",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhu:2020:OSK,
author = "Qingfu Zhu and Weinan Zhang and Lei Cui and Ting Liu",
title = "Order-Sensitive Keywords Based Response Generation in
Open-Domain Conversational Systems",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "18:1--18:18",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3343258",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3343258",
abstract = "External keywords are crucial for response generation
models to address the generic response problems in
open-domain conversational systems. The occurrence of
keywords in a response depends heavily on the order of
the keywords as they are generated \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "18",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhou:2020:NCG,
author = "Guangyou Zhou and Yizhen Fang and Yehong Peng and
Jiaheng Lu",
title = "Neural Conversation Generation with Auxiliary
Emotional Supervised Models",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "19:1--19:17",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3344788",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3344788",
abstract = "An important aspect of developing dialogue agents
involves endowing a conversation system with emotion
perception and interaction. Most existing emotion
dialogue models lack the adaptability and extensibility
of different scenes because of their \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "19",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhu:2020:EDC,
author = "Wenhao Zhu and Xin Jin and Shuang Liu and Zhiguo Lu
and Wu Zhang and Ke Yan and Baogang Wei",
title = "Enhanced Double-Carrier Word Embedding via Phonetics
and Writing",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "20:1--20:18",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3344920",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3344920",
abstract = "Word embeddings, which map words into a unified vector
space, capture rich semantic information. From a
linguistic point of view, words have two carriers,
speech and writing. Yet the most recent word embedding
models focus on only the writing carrier \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "20",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Dehkharghani:2020:SPP,
author = "Rahim Dehkharghani",
title = "{SentiFars}: a {Persian} Polarity Lexicon for
Sentiment Analysis",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "21:1--21:12",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3345627",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3345627",
abstract = "There is no doubt about the usefulness of public
opinion toward different issues in social media and the
World Wide Web. Extracting the feelings of people about
an issue from text is not straightforward. Polarity
lexicons that assign polarity tags or \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "21",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Abdulhameed:2020:WVT,
author = "Tiba Zaki Abdulhameed and Imed Zitouni and Ikhlas
Abdel-Qader",
title = "{Wasf-Vec}: Topology-based Word Embedding for Modern
Standard {Arabic} and {Iraqi} Dialect Ontology",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "22:1--22:27",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3345517",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3345517",
abstract = "Word clustering is a serious challenge in low-resource
languages. Since words that share semantics are
expected to be clustered together, it is common to use
a feature vector representation generated from a
distributional theory-based word embedding \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "22",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Xu:2020:EPS,
author = "Ge Xu and Xiaoyan Yang and Yuanzheng Cai and Zhiqiang
Ruan and Tao Wang and Xiangwen Liao",
title = "Extracting Polarity Shifting Patterns from Any Corpus
Based on Natural Annotation",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "23:1--23:16",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3345518",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3345518",
abstract = "In recent years, online sentiment texts are generated
by users in various domains and in different languages.
Binary polarity classification (positive or negative)
on business sentiment texts can help both companies and
customers to evaluate products or \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "23",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Imankulova:2020:FPP,
author = "Aizhan Imankulova and Takayuki Sato and Mamoru
Komachi",
title = "Filtered Pseudo-parallel Corpus Improves Low-resource
Neural Machine Translation",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "24:1--24:16",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3341726",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3341726",
abstract = "Large-scale parallel corpora are essential for
training high-quality machine translation systems;
however, such corpora are not freely available for many
language translation pairs. Previously, training data
has been augmented by pseudo-parallel corpora
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "24",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Gupta:2020:DNN,
author = "Deepak Gupta and Asif Ekbal and Pushpak
Bhattacharyya",
title = "A Deep Neural Network Framework for {English} {Hindi}
Question Answering",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "25:1--25:22",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3359988",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3359988",
abstract = "In this article, we propose a unified deep neural
network framework for multilingual question answering
(QA). The proposed network deals with the multilingual
questions and answers snippets. The input to the
network is a pair of factoid question and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "25",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yu:2020:LWT,
author = "Hongfei Yu and Xiaoqing Zhou and Xiangyu Duan and Min
Zhang",
title = "Layer-Wise De-Training and Re-Training for {ConvS2S}
Machine Translation",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "26:1--26:15",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358414",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3358414",
abstract = "The convolutional sequence-to-sequence (ConvS2S)
machine translation system is one of the typical neural
machine translation (NMT) systems. Training the ConvS2S
model tends to get stuck in a local optimum in our
pre-studies. To overcome this inferior \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "26",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Somsap:2020:IDW,
author = "Sittichai Somsap and Pusadee Seresangtakul",
title = "{Isarn Dharma} Word Segmentation Using a Statistical
Approach with Named Entity Recognition",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "27:1--27:16",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3359990",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3359990",
abstract = "In this study, we developed an Isarn Dharma word
segmentation system. We mainly focused on solving the
word ambiguity and unknown word problems in unsegmented
Isarn Dharma text. Ambiguous Isarn Dharma words occur
frequently in word construction due to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "27",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Abbas:2020:PIR,
author = "Muhammad Raihan Abbas and Dr. Khadim Hussain Asif",
title = "{Punjabi} to {ISO 15919} and {Roman} Transliteration
with Phonetic Rectification",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "28:1--28:20",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3359991",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3359991",
abstract = "Transliteration removes the script barriers.
Unfortunately, Punjabi is written in four different
scripts, i.e., Gurmukhi, Shahmukhi, Devnagri, and
Latin. The Latin script is understandable for nearly
all factions of the Punjabi community. The objective
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "28",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Beseiso:2020:SAM,
author = "Majdi Beseiso and Haytham Elmousalami",
title = "Subword Attentive Model for {Arabic} Sentiment
Analysis: a Deep Learning Approach",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "29:1--29:17",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3360016",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3360016",
abstract = "Social media data is unstructured data where these big
data are exponentially increasing day to day in many
different disciplines. Analysis and understanding the
semantics of these data are a big challenge due to its
variety and huge volume. To address \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "29",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Harikrishna:2020:CSC,
author = "D. M. Harikrishna and K. Sreenivasa Rao",
title = "{Children}'s Story Classification in {Indian}
Languages Using Linguistic and Keyword-based Features",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "30:1--30:22",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3342356",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3342356",
abstract = "The primary objective of this work is to classify
Hindi and Telugu stories into three genres: fable,
folk-tale, and legend. In this work, we are proposing a
framework for story classification (SC) using keyword
and part-of-speech (POS) features. For \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "30",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jung:2020:WRT,
author = "Hun-Young Jung and Jong-Hyeok Lee and Eunju Min and
Seung-Hoon Na",
title = "Word Reordering for Translation into {Korean} Sign
Language Using Syntactically-guided Classification",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "31:1--31:20",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3357612",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3357612",
abstract = "Machine translation aims to break the language barrier
that prevents communication with others and increase
access to information. Deaf people face huge language
barriers in their daily lives, including access to
digital and spoken information. There \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "31",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Masmoudi:2020:TAA,
author = "Abir Masmoudi and Mariem Ellouze Khmekhem and Mourad
Khrouf and Lamia Hadrich Belguith",
title = "Transliteration of {Arabizi} into {Arabic} Script for
{Tunisian} Dialect",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "32:1--32:21",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3364319",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3364319",
abstract = "The evolution of information and communication
technology has markedly influenced communication
between correspondents. This evolution has facilitated
the transmission of information and has engendered new
forms of written communication (email, chat,
\ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "32",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mukherjee:2020:FST,
author = "Subham Mukherjee and Pradeep Kumar and Partha Pratim
Roy",
title = "Fusion of Spatio-temporal Information for {Indic} Word
Recognition Combining Online and Offline Text Data",
journal = j-TALLIP,
volume = "19",
number = "2",
pages = "33:1--33:24",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3364533",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:05:40 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3364533",
abstract = "We present a novel Indic handwritten word recognition
scheme by fusion of spatio-temporal information
extracted from handwritten images. The main challenge
in Indic word recognition lies in its complexity
because of modifiers, touching characters, and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "33",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yu:2020:ELR,
author = "Zhiqiang Yu and Zhengtao Yu and Junjun Guo and Yuxin
Huang and Yonghua Wen",
title = "Efficient Low-Resource Neural Machine Translation with
Reread and Feedback Mechanism",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "34:1--34:13",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365244",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365244",
abstract = "How to utilize information sufficiently is a key
problem in neural machine translation (NMT), which is
effectively improved in rich-resource NMT by leveraging
large-scale bilingual sentence pairs. However, for
low-resource NMT, lack of bilingual \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "34",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Park:2020:NSB,
author = "Cheoneum Park and Heejun Song and Changki Lee",
title = "{$ S^3$-NET}: {SRU}-Based Sentence and Self-Matching
Networks for Machine Reading Comprehension",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "35:1--35:14",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365679",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365679",
abstract = "Machine reading comprehension question answering
(MRC-QA) is the task of understanding the context of a
given passage to find a correct answer within it. A
passage is composed of several sentences; therefore,
the length of the input sentence becomes \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "35",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sarwar:2020:SSF,
author = "Raheem Sarwar and Thanasarn Porthaveepong and Attapol
Rutherford and Thanawin Rakthanmanon and Sarana
Nutanong",
title = "{StyloThai}:: a Scalable Framework for Stylometric
Authorship Identification of {Thai} Documents",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "36:1--36:15",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365832",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365832",
abstract = "Authorship identification helps to identify the true
author of a given anonymous document from a set of
candidate authors. The applications of this task can be
found in several domains, such as law enforcement
agencies and information retrieval. These \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "36",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kim:2020:UIB,
author = "Hyun Kim and Seung-Hoon Na",
title = "Uniformly Interpolated Balancing for Robust Prediction
in Translation Quality Estimation: a Case Study of
{English--Korean} Translation",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "37:1--37:27",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365916",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365916",
abstract = "There has been growing interest among researchers in
quality estimation (QE), which attempts to
automatically predict the quality of machine
translation (MT) outputs. Most existing works on QE are
based on supervised approaches using quality-annotated
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "37",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhou:2020:LMU,
author = "Xiao Zhou and Zhen-Hua Ling and Li-Rong Dai",
title = "Learning and Modeling Unit Embeddings Using Deep
Neural Networks for Unit-Selection-Based {Mandarin}
Speech Synthesis",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "38:1--38:14",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372244",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372244",
abstract = "A method of learning and modeling unit embeddings
using deep neutral networks (DNNs) is presented in this
article for unit-selection-based Mandarin speech
synthesis. Here, a unit embedding is defined as a
fixed-length embedding vector for a phone-sized
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "38",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mirzaei:2020:SRL,
author = "Azadeh Mirzaei and Fatemeh Sedghi and Pegah Safari",
title = "Semantic Role Labeling System for {Persian} Language",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "39:1--39:12",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372246",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372246",
abstract = "In this article, we present an automatic semantic role
labeling system in Persian consisting of two modules:
argument identification for specifying argument spans
and argument classification for categorizing their
semantic roles. Our modules have been \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "39",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ding:2020:BMT,
author = "Chenchen Ding and Sann Su Su Yee and Win Pa Pa and
Khin Mar Soe and Masao Utiyama and Eiichiro Sumita",
title = "A {Burmese} ({Myanmar}) {Treebank}: Guideline and
Analysis",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "40:1--40:13",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3373268",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3373268",
abstract = "A 20,000-sentence Burmese (Myanmar) treebank on news
articles has been released under a CC BY-NC-SA license.
Complete phrase structure annotation was developed for
each sentence from the morphologically annotated data
prepared in previous work of Ding \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "40",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Song:2020:KPS,
author = "Hyun-Je Song and Seong-Bae Park",
title = "{Korean} Part-of-speech Tagging Based on Morpheme
Generation",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "41:1--41:10",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3373608",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3373608",
abstract = "Two major problems of Korean part-of-speech (POS)
tagging are that the word-spacing unit is not mapped
one-to-one to a POS tag and that morphemes should be
recovered during POS tagging. Therefore, this article
proposes a novel two-step Korean POS tagger \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "41",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mi:2020:LIL,
author = "Chenggang Mi and Lei Xie and Yanning Zhang",
title = "Loanword Identification in Low-Resource Languages with
Minimal Supervision",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "43:1--43:22",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3374212",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3374212",
abstract = "Bilingual resources play a very important role in many
natural language processing tasks, especially the tasks
in cross-lingual scenarios. However, it is expensive
and time consuming to build such resources. Lexical
borrowing happens in almost every \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "43",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2020:INM,
author = "Yachao Li and Junhui Li and Min Zhang and Yixin Li and
Peng Zou",
title = "Improving Neural Machine Translation with Linear
Interpolation of a Short-Path Unit",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "44:1--44:16",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3377851",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3377851",
abstract = "In neural machine translation (NMT), the source and
target words are at the two ends of a large deep neural
network, normally mediated by a series of non-linear
activations. The problem with such consequent
non-linear activations is that they \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "44",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2020:DUK,
author = "Xiao-Yang Liu and Yimeng Zhang and Yukang Liao and
Ling Jiang",
title = "Dynamic Updating of the Knowledge Base for a
Large-Scale Question Answering System",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "45:1--45:13",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3377708",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3377708",
abstract = "Today, the knowledge base question answering (KB-QA)
system is promising to achieve a large-scale
high-quality reply in the e-commerce industry. However,
there exist two major challenges to efficiently support
large-scale KB-QA systems. On the one hand, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "45",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2020:ELM,
author = "Shih-Hung Liu and Kuan-Yu Chen and Berlin Chen",
title = "Enhanced Language Modeling with Proximity and Sentence
Relatedness Information for Extractive Broadcast News
Summarization",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "46:1--46:19",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3377407",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3377407",
abstract = "The primary task of extractive summarization is to
automatically select a set of representative sentences
from a text or spoken document that can concisely
express the most important theme of the original
document. Recently, language modeling (LM) has
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "46",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Du:2020:CNL,
author = "Qianlong Du and Chengqing Zong and Keh-Yih Su",
title = "Conducting Natural Language Inference with
Word-Pair-Dependency and Local Context",
journal = j-TALLIP,
volume = "19",
number = "3",
pages = "47:1--47:23",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3377704",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Mar 3 09:11:26 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3377704",
abstract = "This article proposes to conduct natural language
inference with novel Enhanced-Relation-Head-Dependent
triplets (RHD triplets), which are constructed via
enhancing each word in the RHD triplet with its
associated local context. Most previous approaches
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "47",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zitouni:2020:ENE,
author = "Imed Zitouni",
title = "Editorial from the New {Editor-in-Chief}: the Era of
Natural Language Processing Innovations on {Asian} and
Low-Resource Languages",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "48e:1--48e:2",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3397501",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3397501",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "48e",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yang:2020:OEQ,
author = "Jingxuan Yang and Haotian Cui and Si Li and Sheng Gao
and Jun Guo and Zhengdong Lu",
title = "Outline Extraction with Question-Specific Memory
Cells",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "48:1--48:17",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3377707",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3377707",
abstract = "Outline extraction has been widely applied in online
consultation to help experts quickly understand
individual cases. Given a specific case described as
unstructured plain text, outline extraction aims to
make a summary for this case by answering a set
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "48",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zarnoufi:2020:MNB,
author = "Randa Zarnoufi and Hamid Jaafar and Mounia Abik",
title = "Machine Normalization: Bringing Social Media Text from
Non-Standard to Standard Form",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "49:1--49:30",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3378414",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3378414",
abstract = "User-generated text in social media communication
(SMC) is mainly characterized by non-standard form. It
may contain code switching (CS) text, a widespread
phenomenon in SMC, in addition to noisy elements used,
especially in written conversations (use \ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "49",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bhattu:2020:ICM,
author = "S. Nagesh Bhattu and Satya Krishna Nunna and D. V. L.
N. Somayajulu and Binay Pradhan",
title = "Improving Code-mixed {POS} Tagging Using Code-mixed
Embeddings",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "50:1--50:31",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3380967",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3380967",
abstract = "Social media data has become invaluable component of
business analytics. A multitude of nuances of social
media text make the job of conventional text analytical
tools difficult. Code-mixing of text is a phenomenon
prevalent among social media users, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "50",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ahmad:2020:NER,
author = "Muhammad Tayyab Ahmad and Muhammad Kamran Malik and
Khurram Shahzad and Faisal Aslam and Asif Iqbal and
Zubair Nawaz and Faisal Bukhari",
title = "Named Entity Recognition and Classification for
{Punjabi Shahmukhi}",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "51:1--51:13",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3383306",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3383306",
abstract = "Named entity recognition (NER) refers to the
identification of proper nouns from natural language
text and classifying them into named entity types, such
as person, location, and organization. Due to the
widespread applications of NER, numerous NER \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "51",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Garg:2020:HES,
author = "Kanika Garg and D. K. Lobiyal",
title = "{Hindi EmotionNet}: a Scalable Emotion Lexicon for
Sentiment Classification of {Hindi} Text",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "52:1--52:35",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3383330",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3383330",
abstract = "In this study, we create an emotion lexicon for the
Hindi language called Hindi EmotionNet. It can assign
emotional affinity to words in IndoWordNet. This
lexicon contains 3,839 emotion words, with 1,246
positive and 2,399 negative words. We also \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "52",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Udomcharoenchaikit:2020:AER,
author = "Can Udomcharoenchaikit and Prachya Boonkwan and
Peerapon Vateekul",
title = "Adversarial Evaluation of Robust Neural Sequential
Tagging Methods for {Thai} Language",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "53:1--53:25",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3383201",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3383201",
abstract = "Sequential tagging tasks, such as Part-Of-Speech (POS)
tagging and Named-Entity Recognition, are the building
blocks of many natural language processing
applications. Although prior works have reported
promising results in standard settings, they often
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "53",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sugandhi:2020:SLG,
author = "Sugandhi and Parteek Kumar and Sanmeet Kaur",
title = "Sign Language Generation System Based on {Indian} Sign
Language Grammar",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "54:1--54:26",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3384202",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3384202",
abstract = "Sign Language (SL), also known as gesture-based
language, is used by people with hearing loss to convey
their messages. SL interpreters are required for people
who do not have the knowledge of SL, but interpreters
are not readily available. Thus, a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "54",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sarwar:2020:NLI,
author = "Raheem Sarwar and Attapol T. Rutherford and Saeed-Ul
Hassan and Thanawin Rakthanmanon and Sarana Nutanong",
title = "Native Language Identification of Fluent and Advanced
Non-Native Writers",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "55:1--55:19",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3383202",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3383202",
abstract = "Native Language Identification (NLI) aims at
identifying the native languages of authors by
analyzing their text samples written in a non-native
language. Most existing studies investigate this task
for educational applications such as second language
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "55",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Buyuk:2020:CDS,
author = "Osman B{\"u}y{\"u}k",
title = "Context-Dependent Sequence-to-Sequence {Turkish}
Spelling Correction",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "56:1--56:16",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3383200",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib;
https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3383200",
abstract = "In this article, we make use of sequence-to-sequence
(seq2seq) models for spelling correction in the
agglutinative Turkish language. In the baseline system,
misspelled and target words are split into their
letters and the letter sequences are fed into
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "56",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Khalil:2020:EAC,
author = "Hussein Khalil and Taha Osman and Mohammed Miltan",
title = "Extracting {Arabic} Composite Names Using Genitive
Principles of {Arabic} Grammar",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "57:1--57:16",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3382187",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3382187",
abstract = "Named Entity Recognition (NER) is a basic prerequisite
of using Natural Language Processing (NLP) for
information retrieval. Arabic NER is especially
challenging as the language is morphologically rich and
has short vowels with no capitalisation \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "57",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2020:SCH,
author = "Kexin Wang and Yu Zhou and Jiajun Zhang and Shaonan
Wang and Chengqing Zong",
title = "Structurally Comparative Hinge Loss for
Dependency-Based Neural Text Representation",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "58:1--58:19",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3387633",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3387633",
abstract = "Dependency-based graph convolutional networks
(DepGCNs) are proven helpful for text representation to
handle many natural language tasks. Almost all previous
models are trained with cross-entropy (CE) loss, which
maximizes the posterior likelihood \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "58",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2020:JME,
author = "Maofu Liu and Yukun Zhang and Wenjie Li and Donghong
Ji",
title = "Joint Model of Entity Recognition and Relation
Extraction with Self-attention Mechanism",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "59:1--59:19",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3387634",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3387634",
abstract = "In recent years, the joint model of entity recognition
(ER) and relation extraction (RE) has attracted more
and more attention in the healthcare and medical
domains. However, there are some problems with the
prior work. The joint model cannot extract \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "59",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kumar:2020:LGV,
author = "H. R. Shiva Kumar and A. G. Ramakrishnan",
title = "{Lipi Gnani}: a Versatile {OCR} for Documents in any
Language Printed in {Kannada} Script",
journal = j-TALLIP,
volume = "19",
number = "4",
pages = "60:1--60:23",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3387632",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed Jul 8 18:31:46 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3387632",
abstract = "A Kannada OCR, called Lipi Gnani, has been designed
and developed from scratch, with the motivation of it
being able to convert printed text or poetry in Kannada
script, without any restriction on vocabulary. The
training and test sets have been \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "60",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Hao:2020:CST,
author = "Ming Hao and Bo Xu and Jing-Yi Liang and Bo-Wen Zhang
and Xu-Cheng Yin",
title = "{Chinese} Short Text Classification with
Mutual-Attention Convolutional Neural Networks",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "61:1--61:13",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3388970",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3388970",
abstract = "The methods based on the combination of word-level and
character-level features can effectively boost
performance on Chinese short text classification. A lot
of works concatenate two-level features with little
processing, which leads to losing feature \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "61",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Xu:2020:SDE,
author = "Fan Xu and Jian Luo and Mingwen Wang and Guodong
Zhou",
title = "Speech-Driven End-to-End Language Discrimination
toward {Chinese} Dialects",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "62:1--62:24",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3389021",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3389021",
abstract = "Language discrimination among similar languages,
varieties, and dialects is a challenging natural
language processing task. The traditional text-driven
focus leads to poor results. In this article, we
explore the effectiveness of speech-driven features
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "62",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Chen:2020:IIF,
author = "Junjie Chen and Hongxu Hou and Jing Gao",
title = "Inside Importance Factors of Graph-Based Keyword
Extraction on {Chinese} Short Text",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "63:1--63:15",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3388971",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3388971",
abstract = "Keywords are considered to be important words in the
text and can provide a concise representation of the
text. With the surge of unlabeled short text on the
Internet, automatic keyword extraction task has proven
useful in other information processing \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "63",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Lou:2020:EBS,
author = "Yinxia Lou and Yue Zhang and Fei Li and Tao Qian and
Donghong Ji",
title = "Emoji-Based Sentiment Analysis Using Attention
Networks",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "64:1--64:13",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3389035",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3389035",
abstract = "Emojis are frequently used to express moods, emotions,
and feelings in social media. There has been much
research on emojis and sentiments. However, existing
methods mainly face two limitations. First, they treat
emojis as binary indicator features and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "64",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhou:2020:DNN,
author = "Long Zhou and Jiajun Zhang and Xiaomian Kang and
Chengqing Zong",
title = "Deep Neural Network--based Machine Translation System
Combination",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "65:1--65:19",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3389791",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3389791",
abstract = "Deep neural networks (DNNs) have provably enhanced the
state-of-the-art natural language process (NLP) with
their capability of feature learning and
representation. As one of the more challenging NLP
tasks, neural machine translation (NMT) becomes a new
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "65",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ameur:2020:RAT,
author = "Mohamed Seghir Hadj Ameur and Riadh Belkebir and Ahmed
Guessoum",
title = "Robust {Arabic} Text Categorization by Combining
Convolutional and Recurrent Neural Networks",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "66:1--66:16",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3390092",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3390092",
abstract = "Text Categorization is an important task in the area
of Natural Language Processing (NLP). Its goal is to
learn a model that can accurately classify any textual
document for a given language into one of a set of
predefined categories. In the context of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "66",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Das:2020:SMT,
author = "Ayan Das and Sudeshna Sarkar",
title = "A Survey of the Model Transfer Approaches to
Cross-Lingual Dependency Parsing",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "67:1--67:60",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3383772",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3383772",
abstract = "Cross-lingual dependency parsing approaches have been
employed to develop dependency parsers for the
languages for which little or no treebanks are
available using the treebanks of other languages. A
language for which the cross-lingual parser is
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "67",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Marie:2020:ITU,
author = "Benjamin Marie and Atsushi Fujita",
title = "Iterative Training of Unsupervised Neural and
Statistical Machine Translation Systems",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "68:1--68:21",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3389790",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3389790",
abstract = "Recent work achieved remarkable results in training
neural machine translation (NMT) systems in a fully
unsupervised way, with new and dedicated architectures
that only rely on monolingual corpora. However,
previous work also showed that unsupervised \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "68",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Chimalamarri:2020:MSI,
author = "Santwana Chimalamarri and Dinkar Sitaram and Ashritha
Jain",
title = "Morphological Segmentation to Improve Crosslingual
Word Embeddings for Low Resource Languages",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "69:1--69:15",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3390298",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3390298",
abstract = "Crosslingual word embeddings developed from multiple
parallel corpora help in understanding the
relationships between languages and improving the
prediction quality of machine translation. However, in
low resource languages with complex and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "69",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2020:PQA,
author = "Ying Li and Jizhou Huang and Miao Fan and Jinyi Lei
and Haifeng Wang and Enhong Chen",
title = "Personalized Query Auto-Completion for Large-Scale
{POI} Search at {Baidu} Maps",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "70:1--70:16",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3394137",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib;
https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3394137",
abstract = "Query auto-completion (QAC) is a featured function
that has been widely adopted by many sub-domains of
search. It can dramatically reduce the number of typed
characters and avoid spelling mistakes. These merits of
QAC are highlighted to improve user satisfaction,
especially when users intend to type in a query on
mobile devices. In this article, we will present our
industrial solution to the personalized QAC for the
point of interest (POI) search at Baidu Maps, a
well-known Web mapping service on mobiles in China. The
industrial solution makes a good tradeoff between the
offline effectiveness of a novel neural learning model
that we devised for feature generation and the online
efficiency of an off-the-shelf learning to rank (LTR)
approach for the real-time suggestion. Besides some
practical lessons from how a real-world QAC system is
built and deployed in Baidu Maps to facilitate a large
number of users in searching tens of millions of POIs,
we mainly explore two specific features for the
personalized QAC function of the POI search engine: the
spatial-temporal characteristics of POIs and the
historically queried POIs of individual users.\par
We leverage the large-volume POI search logs in Baidu
Maps to conduct offline evaluations of our personalized
QAC model measured by multiple metrics, including Mean
Reciprocal Rank (MRR), Success Rate (SR), and
normalized Discounted Cumulative Gain (nDCG). Extensive
experimental results demonstrate that the personalized
model enhanced by the proposed features can achieve
substantial improvements (i.e., +3.29\% MRR, +3.78\%
SR@1, +5.17\% SR@3, +1.96\% SR@5, and +3.62\% nDCG@5).
After deploying this upgraded model into the POI search
engine at Baidu Maps for A/B testing online, we observe
that some other critical indicators, such as the
average number of keystrokes and the average typing
speed at keystrokes in a QAC session, which are also
related to user satisfaction, decrease as well by
1.37\% and 1.69\%, respectively. So the conclusion is
that the two kinds of features contributed by us are
quite helpful in personalized mapping services for
industrial practice.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "70",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Alkhatib:2020:DLA,
author = "Manar Alkhatib and Azza Abdel Monem and Khaled
Shaalan",
title = "Deep Learning for {Arabic} Error Detection and
Correction",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "71:1--71:13",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3373266",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib;
https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3373266",
abstract = "Research on tools for automating the proofreading of
Arabic text has received much attention in recent
years. There is an increasing demand for applications
that can detect and correct Arabic spelling and
grammatical errors to improve the quality of Arabic
text content and application input. Our review of
previous studies indicates that few Arabic
spell-checking research efforts appropriately address
the detection and correction of ill-formed words that
do not conform to the Arabic morphology system. Even
fewer systems address the detection and correction of
erroneous well-formed Arabic words that are either
contextually or semantically inconsistent within the
text. We introduce an approach that investigates
employing deep neural network technology for error
detection in Arabic text. We have developed a
systematic framework for spelling and grammar error
detection, as well as correction at the word level,
based on a bidirectional long short-term memory
mechanism and word embedding, in which a polynomial
network classifier is at the top of the system. To get
conclusive results, we have developed the most
significant gold standard annotated corpus to date,
containing 15 million fully inflected Arabic words. The
data were collected from diverse text sources and
genres, in which every erroneous and ill-formed word
has been annotated, validated, and manually revised by
Arabic specialists. This valuable asset is available
for the Arabic natural language processing research
community. The experimental results confirm that our
proposed system significantly outperforms the
performance of Microsoft Word 2013 and Open Office
Ayaspell 3.4, which have been used in the literature
for evaluating similar research.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "71",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Orhan:2020:LWV,
author = "Umut Orhan and Enis Arslan",
title = "Learning Word-vector Quantization: a Case Study in
Morphological Disambiguation",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "72:1--72:18",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3397967",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3397967",
abstract = "We introduced a new classifier named Learning
Word-vector Quantization (LWQ) to solve morphological
ambiguities in Turkish, which is an agglutinative
language. First, a new and morphologically annotated
corpus, and then its datasets are prepared with a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "72",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Dhar:2020:CSC,
author = "Ankita Dhar and Himadri Mukherjee and Niladri Sekhar
Dash and Kaushik Roy",
title = "{CESS} --- a System to Categorize {Bangla} {Web} Text
Documents",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "73:1--73:18",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3398070",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3398070",
abstract = "Technology has evolved remarkably, which has led to an
exponential increase in the availability of digital
text documents of disparate domains over the Internet.
This makes the retrieval of the information a very much
time- and resource-consuming task. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "73",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bai:2020:NCT,
author = "Ruirui Bai and Zhongqing Wang and Fang Kong and
Shoushan Li and Guodong Zhou",
title = "Neural Co-training for Sentiment Classification with
Product Attributes",
journal = j-TALLIP,
volume = "19",
number = "5",
pages = "74:1--74:17",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3394113",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Aug 28 11:52:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3394113",
abstract = "Sentiment classification aims to detect polarity from
a piece of text. The polarity is usually positive or
negative, and the text genre is usually product review.
The challenges of sentiment classification are that it
is hard to capture semantic of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "74",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Schmidt:2020:GTC,
author = "Dirk Schmidt",
title = "Grading {Tibetan} Children's Literature: a Test Case
Using the {NLP} Readability Tool {``Dakje''}",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "75:1--75:19",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3392046",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3392046",
abstract = "Worldwide, literacy is on the rise. This historically
unprecedented surge-especially over the past 200
years-has changed nearly everything about the ancient
technology of reading. Who reads is changing: Literacy
is no longer just for elite, professional \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "75",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Habiba:2020:TCN,
author = "Rabia Habiba and Dr. Muhammad Awais and Dr. Muhammad
Shoaib",
title = "A Technique to Calculate National Happiness Index by
Analyzing {Roman Urdu} Messages Posted on Social
Media",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "76:1--76:16",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3400712",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3400712",
abstract = "National Happiness Index (NHI) is a national indicator
of development that estimates the economic and social
well-being of the nation's individuals. With the
proliferation of the internet, people share a
significant amount of data on social media \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "76",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2020:EFL,
author = "Hao Wang and Qiongxing Tao and Siyuan Du and Xiangfeng
Luo",
title = "An Extensible Framework of Leveraging Syntactic
Skeleton for Semantic Relation Classification",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "77:1--77:21",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3402885",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3402885",
abstract = "Relation classification is one of the most fundamental
upstream tasks in natural language processing and
information extraction. State-of-the-art approaches
make use of various deep neural networks (DNNs) to
extract higher-level features directly. They \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "77",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Malhas:2020:ABR,
author = "Rana Malhas and Tamer Elsayed",
title = "{AyaTEC}: Building a Reusable Verse-Based Test
Collection for {Arabic} Question Answering on the {Holy
Qur'an}",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "78:1--78:21",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3400396",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3400396",
abstract = "The absence of publicly available reusable test
collections for Arabic question answering on the Holy
Qur'an has impeded the possibility of fairly comparing
the performance of systems in that domain. In this
article, we introduce AyaTEC, a reusable test
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "78",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ruan:2020:CTV,
author = "Yu-Ping Ruan and Zhen-Hua Ling and Xiaodan Zhu",
title = "Condition-Transforming Variational Autoencoder for
Generating Diverse Short Text Conversations",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "79:1--79:13",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3402884",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3402884",
abstract = "In this article, conditional-transforming variational
autoencoders (CTVAEs) are proposed for generating
diverse short text conversations. In conditional
variational autoencoders (CVAEs), the prior
distribution of latent variable z follows a
multivariate \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "79",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Badaro:2020:LPA,
author = "Gilbert Badaro and Hazem Hajj and Nizar Habash",
title = "A Link Prediction Approach for Accurately Mapping a
Large-scale {Arabic} Lexical Resource to {English}
{WordNet}",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "80:1--80:38",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3404854",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3404854",
abstract = "Success of Natural Language Processing (NLP) models,
just like all advanced machine learning models, rely
heavily on large -scale lexical resources. For English,
English WordNet (EWN) is a leading example of a
large-scale resource that has enabled \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "80",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ma:2020:IWS,
author = "Tinghuai Ma and Raeed Al-Sabri and Lejun Zhang and
Bockarie Marah and Najla Al-Nabhan",
title = "The Impact of Weighting Schemes and Stemming Process
on Topic Modeling of {Arabic} Long and Short Texts",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "81:1--81:23",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3405843",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3405843",
abstract = "In this article, first a comprehensive study of the
impact of term weighting schemes on the topic modeling
performance (i.e., LDA and DMM) on Arabic long and
short texts is presented. We investigate six term
weighting methods including Word count method
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "81",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{M:2020:CAH,
author = "Poornima Devi. M. and M. Sornam",
title = "Classification of Ancient Handwritten {Tamil}
Characters on Palm Leaf Inscription Using Modified
Adaptive Backpropagation Neural Network with {GLCM}
Features",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "82:1--82:24",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3406209",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3406209",
abstract = "The core aspiration of this proposed work is to
classify Tamil characters inscribed in the palm leaf
manuscript using an Artificial Neural Network. Tamil
palm leaf manuscript characters in the form of images
were processed and segmented using contour-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "82",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yang:2020:AMU,
author = "Qimeng Yang and Long Yu and Shengwei Tian and Jinmiao
Song",
title = "Attention Mechanism for {Uyghur} Personal Pronouns
Resolution",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "83:1--83:13",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3412323",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3412323",
abstract = "Deep neural network models for Uyghur personal pronoun
resolution learn semantic information for personal
pronoun and antecedents, but tend to be
short-sighted-they ignore the importance of each
feature. In this article, we propose a Uyghur personal
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "83",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Xi:2020:GEL,
author = "Xuefeng Xi and Zhou Pi and Guodong Zhou",
title = "Global Encoding for Long {Chinese} Text
Summarization",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "84:1--84:17",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3407911",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3407911",
abstract = "Text summarization is one of the significant tasks of
natural language processing, which automatically
converts text into a summary. Some summarization
systems, for short/long English, and short Chinese
text, benefit from advances in the neural encoder-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "84",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Tanwar:2020:TMR,
author = "Ashwani Tanwar and Prasenjit Majumder",
title = "Translating Morphologically Rich {Indian} Languages
under Zero-Resource Conditions",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "85:1--85:15",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3407912",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3407912",
abstract = "This work presents an in-depth analysis of machine
translations of morphologically-rich Indo-Aryan and
Dravidian languages under zero-resource conditions. It
focuses on Zero-Shot Systems for these languages and
leverages transfer-learning by exploiting \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "85",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Laatar:2020:DAW,
author = "Rim Laatar and Chafik Aloulou and Lamia Hadrich
Belguith",
title = "Disambiguating {Arabic} Words According to Their
Historical Appearance in the Document Based on
Recurrent Neural Networks",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "86:1--86:16",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3410569",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3410569",
abstract = "How can we determine the semantic meaning of a word in
relation to its context of appearance? We eventually
have to grabble with this difficult question, as one of
the paramount problems of Natural Language Processing
(NLP). In other words, this issue \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "86",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Chou:2020:CWN,
author = "Chien-Lung Chou and Chia-Hui Chang and Yuan-Hao Lin
and Kuo-Chun Chien",
title = "On the Construction of {Web} {NER} Model Training Tool
based on Distant Supervision",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "87:1--87:28",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3422817",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3422817",
abstract = "Named entity recognition (NER) is an important task in
natural language understanding, as it extracts the key
entities (person, organization, location, date, number,
etc.) and objects (product, song, movie, activity name,
etc.) mentioned in texts. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "87",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wu:2020:DEW,
author = "Chuhan Wu and Fangzhao Wu and Tao Qi and Junxin Liu
and Yongfeng Huang and Xing Xie",
title = "Detecting Entities of Works for {Chinese} Chatbot",
journal = j-TALLIP,
volume = "19",
number = "6",
pages = "88:1--88:13",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3414901",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sun Mar 28 08:15:55 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3414901",
abstract = "Chatbots such as Xiaoice have gained huge popularity
in recent years. Users frequently mention their
favorite works such as songs and movies in
conversations with chatbots. Detecting these entities
can help design better chat strategies and improve user
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "88",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Manogaran:2021:SID,
author = "Gunasekaran Manogaran and Hassan Qudrat-Ullah and Qin
Xin",
title = "Special Issue on Deep Structured Learning for Natural
Language Processing",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "1:1--1:2",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3436206",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3436206",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "1",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2021:CDB,
author = "Kun Wang and Yanpeng Cui and Jianwei Hu and Yu Zhang
and Wei Zhao and Luming Feng",
title = "Cyberbullying Detection, Based on the {FastText} and
Word Similarity Schemes",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "2:1--2:15",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3398191",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3398191",
abstract = "With recent developments in online social networks
(OSNs), these services are widely applied in daily
lives. On the other hand, cyberbullying, which is a
relatively new type of harassment through the
internet-based electronic devices, is rising in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "2",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sun:2021:DIM,
author = "Chengai Sun and Liangyu Lv and Gang Tian and Tailu
Liu",
title = "Deep Interactive Memory Network for Aspect-Level
Sentiment Analysis",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "3:1--3:12",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3402886",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3402886",
abstract = "The goal of aspect-level sentiment analysis is to
identify the sentiment polarity of a specific opinion
target expressed; it is a fine-grained sentiment
analysis task. Most of the existing works study how to
better use the target information to model \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "3",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2021:VTM,
author = "Wei Wang and Zhiguo Gong and Jing Ren and Feng Xia and
Zhihan Lv and Wei Wei",
title = "Venue Topic Model-enhanced Joint Graph Modelling for
Citation Recommendation in Scholarly Big Data",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "4:1--4:15",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3404995",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3404995",
abstract = "Natural language processing technologies, such as
topic models, have been proven to be effective for
scholarly recommendation tasks with the ability to deal
with content information. Recently, venue
recommendation is becoming an increasingly important
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "4",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Peng:2021:THS,
author = "Lingxi Peng and Haohuai Liu and Yangang Nie and Ying
Xie and Xuan Tang and Ping Luo",
title = "The Transnational Happiness Study with Big Data
Technology",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "5:1--5:12",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3412497",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3412497",
abstract = "Happiness is a hot topic in academic circles. The
study of happiness involves many disciplines, such as
philosophy, psychology, sociology, and economics.
However, there are few studies on the quantitative
analysis of the factors affecting happiness. In
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "5",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Guangce:2021:KDN,
author = "Ruan Guangce and Xia Lei",
title = "Knowledge Discovery of News Text Based on Artificial
Intelligence",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "6:1--6:18",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418062",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3418062",
abstract = "The explosion of news text and the development of
artificial intelligence provide a new opportunity and
challenge to provide high-quality media monitoring
service. In this article, we propose a semantic
analysis approach based on the Latent Dirichlet
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "6",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Meelen:2021:OLA,
author = "Marieke Meelen and {\'E}lie Roux and Nathan Hill",
title = "Optimisation of the Largest Annotated {Tibetan} Corpus
Combining Rule-based, Memory-based, and Deep-learning
Methods",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "7:1--7:11",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3409488",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3409488",
abstract = "This article presents a pipeline that converts
collections of Tibetan documents in plain text or XML
into a fully segmented and POS-tagged corpus. We apply
the pipeline to the large extent collection of the
Buddhist Digital Resource Center. The semisupervised
methods presented here not only result in a new and
improved version of the largest annotated Tibetan
corpus to date, the integration of rule-based,
memory-based, and neural-network methods also serves as
a good example of how to overcome challenges of
under-researched languages. The end-to-end accuracy of
our entire automatic pipeline of 91.99\% is high enough
to make the resulting corpus a useful resource for both
linguists and scholars of Tibetan studies.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "7",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kumar:2021:MNF,
author = "S. Rakesh Kumar and S. Muthuramalingam and Fadi
Al-Turjman",
title = "Multimodal News Feed Evaluation System with Deep
Reinforcement Learning Approaches",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "8:1--8:12",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3414523",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3414523",
abstract = "Multilingual and multimodal data analysis is the
emerging news feed evaluation system. News feed
analysis and evaluations are interrelated processes,
which are useful in understanding the news factors. The
news feed evaluation system can be implemented
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "8",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Khan:2021:HSD,
author = "Muhammad Moin Khan and Khurram Shahzad and Muhammad
Kamran Malik",
title = "Hate Speech Detection in {Roman Urdu}",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "9:1--9:19",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3414524",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3414524",
abstract = "Hate speech is a specific type of controversial
content that is widely legislated as a crime that must
be identified and blocked. However, due to the sheer
volume and velocity of the Twitter data stream, hate
speech detection cannot be performed manually. To
address this issue, several studies have been conducted
for hate speech detection in European languages,
whereas little attention has been paid to low-resource
South Asian languages, making the social media
vulnerable for millions of users. In particular, to the
best of our knowledge, no study has been conducted for
hate speech detection in Roman Urdu text, which is
widely used in the sub-continent. In this study, we
have scrapped more than 90,000 tweets and manually
parsed them to identify 5,000 Roman Urdu tweets.
Subsequently, we have employed an iterative approach to
develop guidelines and used them for generating the
Hate Speech Roman Urdu 2020 corpus. The tweets in the
this corpus are classified at three levels:
Neutral--Hostile, Simple--Complex, and Offensive--Hate
speech. As another contribution, we have used five
supervised learning techniques, including a deep
learning technique, to evaluate and compare their
effectiveness for hate speech detection. The results
show that Logistic Regression outperformed all other
techniques, including deep learning techniques for the
two levels of classification, by achieved an F1 score
of 0.906 for distinguishing between Neutral--Hostile
tweets, and 0.756 for distinguishing between
Offensive--Hate speech tweets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "9",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sun:2021:UNM,
author = "Haipeng Sun and Rui Wang and Masao Utiyama and
Benjamin Marie and Kehai Chen and Eiichiro Sumita and
Tiejun Zhao",
title = "Unsupervised Neural Machine Translation for Similar
and Distant Language Pairs: an Empirical Study",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "10:1--10:17",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418059",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3418059",
abstract = "Unsupervised neural machine translation (UNMT) has
achieved remarkable results for several language pairs,
such as French-English and German-English. Most
previous studies have focused on modeling UNMT systems;
few studies have investigated the effect \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "10",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhang:2021:HBN,
author = "Peiying Zhang and Xingzhe Huang and Maozhen Li and Yu
Xue",
title = "Hybridization between Neural Computing and
Nature-Inspired Algorithms for a Sentence Similarity
Model Based on the Attention Mechanism",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "11:1--11:21",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447756",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3447756",
abstract = "Sentence similarity analysis has been applied in many
fields, such as machine translation, the question
answering system, and voice customer service. As a
basic task of natural language processing, sentence
similarity analysis plays an important role in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "11",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Husain:2021:SOL,
author = "Fatemah Husain and Ozlem Uzuner",
title = "A Survey of Offensive Language Detection for the
{Arabic} Language",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "12:1--12:44",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3421504",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3421504",
abstract = "The use of offensive language in user-generated
content is a serious problem that needs to be addressed
with the latest technology. The field of Natural
Language Processing (NLP) can support the automatic
detection of offensive language. In this survey,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "12",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Alzubaidi:2021:RTA,
author = "Mohammad A. Alzubaidi and Mwaffaq Otoom and Nouran S.
Ahmad",
title = "Real-time Assistive Reader Pen for {Arabic} Language",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "13:1--13:30",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3423133",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3423133",
abstract = "Disability is an impairment affecting an individual's
livelihood and independence. Assistive technology
enables the disabled cohort of the community to break
the barriers to learning, access information,
contribute to the community, and live \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "13",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sidig:2021:KAS,
author = "Ala Addin I. Sidig and Hamzah Luqman and Sabri Mahmoud
and Mohamed Mohandes",
title = "{KArSL}: {Arabic} Sign Language Database",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "14:1--14:19",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3423420",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3423420",
abstract = "Sign language is the major means of communication for
the deaf community. It uses body language and gestures
such as hand shapes, lib patterns, and facial
expressions to convey a message. Sign language is
geography-specific, as it differs from one \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "14",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wijayanti:2021:AIS,
author = "Rini Wijayanti and Andria Arisal",
title = "Automatic {Indonesian} Sentiment Lexicon Curation with
Sentiment Valence Tuning for Social Media Sentiment
Analysis",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "15:1--15:16",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3425632",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3425632",
abstract = "A novel Indonesian sentiment lexicon (SentIL ---
Sentiment Indonesian Lexicon) is created with an
automatic pipeline; from creating sentiment seed words,
adding new words with slang words, emoticons, and from
the given dictionary and sentiment corpus, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "15",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2021:TTS,
author = "Zhongyang Li and Xiao Ding and Ting Liu",
title = "{TransBERT}: a Three-Stage Pre-training Technology for
Story-Ending Prediction",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "16:1--16:20",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3427669",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3427669",
abstract = "Recent advances, such as GPT, BERT, and RoBERTa, have
shown success in incorporating a pre-trained
transformer language model and fine-tuning operations
to improve downstream NLP systems. However, this
framework still has some fundamental problems in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "16",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bolucu:2021:CUM,
author = "Necva B{\"o}l{\"u}c{\"u} and Burcu Can",
title = "A Cascaded Unsupervised Model for {PoS} Tagging",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "17:1--17:23",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447759",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3447759",
abstract = "Part of speech (PoS) tagging is one of the fundamental
syntactic tasks in Natural Language Processing, as it
assigns a syntactic category to each word within a
given sentence or context (such as noun, verb,
adjective, etc.). Those syntactic categories \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "17",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Chauhan:2021:ISC,
author = "Uttam Chauhan and Apurva Shah",
title = "Improving Semantic Coherence of {Gujarati} Text Topic
Model Using Inflectional Forms Reduction and
Single-letter Words Removal",
journal = j-TALLIP,
volume = "20",
number = "1",
pages = "18:1--18:18",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447760",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu Apr 15 14:24:01 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3447760",
abstract = "A topic model is one of the best stochastic models for
summarizing an extensive collection of text. It has
accomplished an inordinate achievement in text analysis
as well as text summarization. It can be employed to
the set of documents that are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "18",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Congjun:2021:RDT,
author = "Long Congjun and Nathan W. Hill",
title = "Recent Developments in {Tibetan NLP}",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "19:1--19:3",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3453692",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3453692",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "19",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{List:2021:TSH,
author = "Johann-Mattis List and Nathaniel A. Sims and Robert
Forkel",
title = "Toward a Sustainable Handling of Interlinear-Glossed
Text in Language Documentation",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "20:1--20:15",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3389010",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3389010",
abstract = "While the amount of digitally available data on the
worlds' languages is steadily increasing, with more and
more languages being documented, only a small
proportion of the language resources produced are
sustainable. Data reuse is often difficult due to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "20",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Krishna:2021:ATA,
author = "Ravi Krishna and Norman Mu and Kurt Keutzer",
title = "Applying Text Analytics to the Mind-section Literature
of the {Tibetan} Tradition of the {Great Perfection}",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "21:1--21:32",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3392047",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3392047",
abstract = "Over the past decade, through a mixture of optical
character recognition and manual input, there is now a
growing corpus of Tibetan literature available as
e-texts in Unicode format. With the creation of such a
corpus, the techniques of text analytics \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "21",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Trinley:2021:TWE,
author = "Ngawang Trinley and Tenzin and Dirk Schmidt and Helios
Hildt and Tenzin Kaldan",
title = "Taming the Wild Etext: Managing, Annotating, and
Sharing {Tibetan} Corpora in Open Spaces",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "22:1--22:23",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418060",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3418060",
abstract = "Digital text is quickly becoming essential to modern
daily life. The article you are reading right now is
born digital; unlike texts of the not-so-distant past,
it may never be printed at all. Worldwide, the trend is
clear: Digital text is on the way in, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "22",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kulkarni:2021:SPF,
author = "Amba Kulkarni",
title = "{Sanskrit} Parsing Following {Indian} Theories of
Verbal Cognition",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "23:1--23:38",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418061",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3418061",
abstract = "P{\=a}{\d{n}}ini's grammar is an important milestone
in the Indian grammatical tradition. Unlike grammars of
other languages, it is almost exhaustive and together
with the theories of 'sabdabodha (verbal cognition),
this grammar provides a system for language \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "23",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2021:FBS,
author = "Yachao Li and Jing Jiang and Jia Yangji and Ning Ma",
title = "Finding Better Subwords for {Tibetan} Neural Machine
Translation",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "24:1--24:11",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3448216",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3448216",
abstract = "Subword segmentation plays an important role in
Tibetan neural machine translation (NMT). The structure
of Tibetan words consists of two levels. First, words
consist of a sequence of syllables, and then a syllable
consists of a sequence of characters. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "24",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Long:2021:RTM,
author = "Congjun Long and Xuewen Zhou and Maoke Zhou",
title = "Recognition of {Tibetan} Maximal-length Noun Phrases
Based on Syntax Tree",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "25:1--25:13",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3423324",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3423324",
abstract = "Frequently corresponding to syntactic components, the
Maximal-length Noun Phrase (MNP) possesses abundant
syntactic and semantic information and acts a certain
semantic role in sentences. Recognition of MNP plays an
important role in Natural Language \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "25",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Shi:2021:MLC,
author = "Shumin Shi and Dan Luo and Xing Wu and Congjun Long
and Heyan Huang",
title = "Multi-level Chunk-based Constituent-to-Dependency
{Treebank} Transformation for {Tibetan} Dependency
Parsing",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "26:1--26:12",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3424247",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3424247",
abstract = "Dependency parsing is an important task for Natural
Language Processing (NLP). However, a mature parser
requires a large treebank for training, which is still
extremely costly to create. Tibetan is a kind of
extremely low-resource language for NLP, there
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "26",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sun:2021:JMR,
author = "Yuan Sun and Andong Chen and Chaofan Chen and Tianci
Xia and Xiaobing Zhao",
title = "A Joint Model for Representation Learning of {Tibetan}
Knowledge Graph Based on Encyclopedia",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "27:1--27:17",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447248",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3447248",
abstract = "Learning the representation of a knowledge graph is
critical to the field of natural language processing.
There is a lot of research for English knowledge graph
representation. However, for the low-resource
languages, such as Tibetan, how to represent \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "27",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2021:CSE,
author = "Hao Wang and Bin Wang and Jianyong Duan and Jiajun
Zhang",
title = "{Chinese} Spelling Error Detection Using a Fusion
Lattice {LSTM}",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "28:1--28:11",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3426882",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3426882",
abstract = "Spelling error detection serves as a crucial
preprocessing in many natural language processing
applications. Unlike English, where every single word
is directly typed by keyboard, we have to use an input
method to input Chinese characters. The pinyin
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "28",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Nasution:2021:POB,
author = "Arbi Haza Nasution and Yohei Murakami and Toru
Ishida",
title = "Plan Optimization to Bilingual Dictionary Induction
for Low-resource Language Families",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "29:1--29:28",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3448215",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3448215",
abstract = "Creating bilingual dictionary is the first crucial
step in enriching low-resource languages. Especially
for the closely related ones, it has been shown that
the constraint-based approach is useful for inducing
bilingual lexicons from two bilingual \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "29",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{An:2021:NDP,
author = "Bo An and Congjun Long",
title = "Neural Dependency Parser for {Tibetan} Sentences",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "30:1--30:16",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3429456",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3429456",
abstract = "The research of Tibetan dependency analysis is mainly
limited to two challenges: lack of a dataset and
reliance on expert knowledge. To resolve the preceding
challenges, we first introduce a new Tibetan dependency
analysis dataset, and then propose a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "30",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhang:2021:USC,
author = "Longtu Zhang and Mamoru Komachi",
title = "Using Sub-character Level Information for Neural
Machine Translation of Logographic Languages",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "31:1--31:15",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3431727",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3431727",
abstract = "Logographic and alphabetic languages (e.g., Chinese
vs. English) have different writing systems
linguistically. Languages belonging to the same writing
system usually exhibit more sharing information, which
can be used to facilitate natural language \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "31",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mishra:2021:HIC,
author = "Santosh Kumar Mishra and Rijul Dhir and Sriparna Saha
and Pushpak Bhattacharyya",
title = "A {Hindi} Image Caption Generation Framework Using
Deep Learning",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "32:1--32:19",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3432246",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3432246",
abstract = "Image captioning is the process of generating a
textual description of an image that aims to describe
the salient parts of the given image. It is an
important problem, as it involves computer vision and
natural language processing, where computer vision
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "32",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Darwish:2021:ADR,
author = "Kareem Darwish and Ahmed Abdelali and Hamdy Mubarak
and Mohamed Eldesouki",
title = "{Arabic} Diacritic Recovery Using a Feature-rich
{biLSTM} Model",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "33:1--33:18",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434235",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3434235",
abstract = "Diacritics (short vowels) are typically omitted when
writing Arabic text, and readers have to reintroduce
them to correctly pronounce words. There are two types
of Arabic diacritics: The first are core-word
diacritics (CW), which specify the lexical \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "33",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Binbeshr:2021:SRH,
author = "Farid Binbeshr and Amirrudin Kamsin and Manal
Mohammed",
title = "A Systematic Review on Hadith Authentication and
Classification Methods",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "34:1--34:17",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434236",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3434236",
abstract = "Background: A hadith refers to sayings, actions, and
characteristics of the Prophet Muhammad peace be upon
him. The authenticity of hadiths is crucial, because
they constitute the source of legislation for Muslims
with the Holy Quran. Classifying hadiths \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "34",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2021:HMN,
author = "Yu Wang and Yining Sun and Zuchang Ma and Lisheng Gao
and Yang Xu",
title = "A Hybrid Model for Named Entity Recognition on
{Chinese} Electronic Medical Records",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "35:1--35:12",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3436819",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3436819",
abstract = "Electronic medical records (EMRs) contain valuable
information about the patients, such as clinical
symptoms, diagnostic results, and medications. Named
entity recognition (NER) aims to recognize entities
from unstructured text, which is the initial step
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "35",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jin:2021:HSS,
author = "Guozhe Jin and Zhezhou Yu",
title = "A Hierarchical Sequence-to-Sequence Model for {Korean}
{POS} Tagging",
journal = j-TALLIP,
volume = "20",
number = "2",
pages = "36:1--36:13",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3421762",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Thu May 6 07:32:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3421762",
abstract = "Part-of-speech (POS) tagging is a fundamental task in
natural language processing. Korean POS tagging
consists of two subtasks: morphological analysis and
POS tagging. In recent years, scholars have tended to
use the seq2seq model to solve this problem. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "36",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{S:2021:SAA,
author = "Dhivya S. and Usha Devi G.",
title = "Study on Automated Approach to Recognize Characters
for Handwritten and Historical Document",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "37:1--37:24",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3396167",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3396167",
abstract = "Script recognition is the mechanism of automatic
script analysis and recognition whereby intensive study
has been carried out and a significant amount of papers
on this problem have been released over the past. But
there are still a few issues to be \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "37",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yang:2021:DDH,
author = "Xiaodong Yang and Xiaoxia Lin",
title = "Design and Development of Heuristic Utility Management
Algorithm for {Chinese} Library Management System",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "38:1--38:13",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3397968",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3397968",
abstract = "Utility Management in a library is the programmatic
tool with the synthetic mental program ability, along
with Artificial Intelligence capacities, headed to
manage a high volume of books, articles, and
assignments, which help to ease the manual \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "38",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{S:2021:THT,
author = "Dhivya S. and Usha Devi G.",
title = "{TAMIZHI}: Historical Tamil-Brahmi Script Recognition
Using {CNN} and {MobileNet}",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "39:1--39:26",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3402891",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3402891",
abstract = "Computational epigraphy is the study of an ancient
script where the computer science and mathematical
model is relatively built for epigraphy. The
Tamil-Brahmi inscriptions are the most ancient of the
extant written of the Tamil. The inscriptions furnish
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "39",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jiang:2021:TLB,
author = "Peipei Jiang and Liailun Chen and Min-Feng Wang",
title = "Transfer Learning Based Recurrent Neural Network
Algorithm for Linguistic Analysis",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "40:1--40:16",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3406204",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3406204",
abstract = "Each language is a system of understanding and skills
that allows language users to interact, express
thoughts, hypotheses, feelings, wishes, and all that
needs to be expressed. Linguistics is the research of
these structures in all respects: the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "40",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Fang:2021:HBG,
author = "Hui Fang and Hongmei Shi and Jiuzhou Zhang",
title = "Heuristic Bilingual Graph Corpus Network to Improve
{English} Instruction Methodology Based on Statistical
Translation Approach",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "41:1--41:14",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3406205",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3406205",
abstract = "The number of sentence pairs in the bilingual corpus
is a key to translation accuracy in computational
machine translations. However, if the amount goes
beyond a certain degree, the increasing number of cases
has less impact on the translation while the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "41",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jamal:2021:DLB,
author = "Nasir Jamal and Chen Xianqiao and Fadi Al-Turjman and
Farhan Ullah",
title = "A Deep Learning-based Approach for Emotions
Classification in Big Corpus of Imbalanced Tweets",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "42:1--42:16",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3410570",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3410570",
abstract = "Emotions detection in natural languages is very
effective in analyzing the user's mood about a
concerned product, news, topic, and so on. However, it
is really a challenging task to extract important
features from a burst of raw social text, as emotions
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "42",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Muthu:2021:FET,
author = "Balaanand Muthu and Sivaparthipan Cb and Priyan
Malarvizhi Kumar and Seifedine Nimer Kadry and
Ching-Hsien Hsu and Oscar Sanjuan and Ruben Gonzalez
Crespo",
title = "A Framework for Extractive Text Summarization Based on
Deep Learning Modified Neural Network Classifier",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "45:1--45:20",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3392048",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3392048",
abstract = "There is an exponential growth of text data over the
internet, and it is expected to gain significant growth
and attention in the coming years. Extracting
meaningful insights from text data is crucially
important as it offers value-added solutions to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "45",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2021:AEA,
author = "Ailing Wang and Jie Sun and Leiming Li",
title = "An Analysis for Elements of Affecting the
Establishment and Promotion of Micro-business Trust in
{C2C} Model under {WeChat} Circumstance",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "46:1--46:11",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3398011",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3398011",
abstract = "The core of micro-business and consumer transactions
is trust. Based on the Theory of Reasoned Action and
Technology Acceptance Model, this article discusses the
factors of the establishment and promotion of
micro-business trust from the trust orientation
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "46",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2021:SGD,
author = "Erlu Wang and Priyan Malarvizhi Kumar and R. Dinesh
Jackson Samuel",
title = "Semantic Graphical Dependence Parsing Model in
Improving {English} Teaching Abilities",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "48:1--48:14",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3425633",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3425633",
abstract = "It is a very difficult problem to achieve high-order
functionality for graphical dependency parsing without
growing decoding difficulties. To solve this problem,
this article offers a way for Semantic Graphical
Dependence Parsing Model (SGDPM) with a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "48",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{P:2021:TST,
author = "Ashokkumar P. and Siva Shankar G. and Gautam
Srivastava and Praveen Kumar Reddy Maddikunta and
Thippa Reddy Gadekallu",
title = "A Two-stage Text Feature Selection Algorithm for
Improving Text Classification",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "49:1--49:19",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3425781",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3425781",
abstract = "As the number of digital text documents increases on a
daily basis, the classification of text is becoming a
challenging task. Each text document consists of a
large number of words (or features) that drive down the
efficiency of a classification \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "49",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Quamer:2021:SSA,
author = "Waris Quamer and Praphula Kumar Jain and Arpit Rai and
Vijayalakshmi Saravanan and Rajendra Pamula and
Chiranjeev Kumar",
title = "{SACNN}: Self-attentive Convolutional Neural Network
Model for Natural Language Inference",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "50:1--50:16",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3426884",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3426884",
abstract = "Inference has been central problem for understanding
and reasoning in artificial intelligence. Especially,
Natural Language Inference is an interesting problem
that has attracted the attention of many researchers.
Natural language inference intends to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "50",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liao:2021:ENO,
author = "Hsiu-Li Liao and Zhen-Yu Huang and Su-Houn Liu",
title = "The Effects of Negative Online Reviews on Consumer
Perception, Attitude and Purchase Intention:
Experimental Investigation of the Amount, Quality, and
Presentation Order of {eWOM}",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "51:1--51:21",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3426883",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3426883",
abstract = "The quick growth and fast spread of electronic
word-of-mouth (eWOM) have created a new threat to
Internet merchants and marketers through paid online
reviewers flooding sites with product and service
reviews that could confuse and deter customers. This
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "51",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Seifollahi:2021:EBT,
author = "Sattar Seifollahi and Massimo Piccardi and Alireza
Jolfaei",
title = "An Embedding-Based Topic Model for Document
Classification",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "52:1--52:13",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3431728",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3431728",
abstract = "Topic modeling is an unsupervised learning task that
discovers the hidden topics in a collection of
documents. In turn, the discovered topics can be used
for summarizing, organizing, and understanding the
documents in the collection. Most of the existing
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "52",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2021:DSL,
author = "Yong Li and Xiaojun Yang and Min Zuo and Qingyu Jin
and Haisheng Li and Qian Cao",
title = "Deep Structured Learning for Natural Language
Processing",
journal = j-TALLIP,
volume = "20",
number = "3",
pages = "53:1--53:14",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3433538",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:09 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3433538",
abstract = "The real-time and dissemination characteristics of
network information make net-mediated public opinion
become more and more important food safety early
warning resources, but the data of petabyte (PB) scale
growth also bring great difficulties to the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "53",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mao:2021:NJM,
author = "Cunli Mao and Zhibo Man and Zhengtao Yu and Shengxiang
Gao and Zhenhan Wang and Hongbin Wang",
title = "A Neural Joint Model with {BERT} for {Burmese}
Syllable Segmentation, Word Segmentation, and {POS}
Tagging",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "54:1--54:23",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3436818",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3436818",
abstract = "The smallest semantic unit of the Burmese language is
called the syllable. In the present study, it is
intended to propose the first neural joint learning
model for Burmese syllable segmentation, word
segmentation, and part-of-speech (POS) tagging with
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "54",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{E:2021:AMP,
author = "Manjunath K. E. and Srinivasa Raghavan K. M. and K.
Sreenivasa Rao and Dinesh Babu Jayagopi and V.
Ramasubramanian",
title = "Approaches for Multilingual Phone Recognition in
Code-switched and Non-code-switched Scenarios Using
{Indian} Languages",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "55:1--55:19",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3437256",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3437256",
abstract = "In this study, we evaluate and compare two different
approaches for multilingual phone recognition in
code-switched and non-code-switched scenarios. First
approach is a front-end Language Identification
(LID)-switched to a monolingual phone recognizer
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "55",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kumar:2021:NAM,
author = "Mohinder Kumar and Manish Kumar Jindal and Munish
Kumar",
title = "A Novel Attack on Monochrome and Greyscale
{Devanagari} {CAPTCHAs}",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "56:1--56:30",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3439798",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3439798",
abstract = "The use of computer programs in breaching web site
security is common today. CAPTCHA (Completely Automated
Public Turing test to tell Computers and Humans Apart)
and human interaction proofs are the cost-effective
solution to these kinds of computer \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "56",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Lin:2021:FIG,
author = "Nankai Lin and Boyu Chen and Xiaotian Lin and Kanoksak
Wattanachote and Shengyi Jiang",
title = "A Framework for {Indonesian} Grammar Error
Correction",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "57:1--57:12",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3440993",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3440993",
abstract = "Grammatical Error Correction (GEC) is a challenge in
Natural Language Processing research. Although many
researchers have been focusing on GEC in universal
languages such as English or Chinese, few studies focus
on Indonesian, which is a low-resource \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "57",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Shivachi:2021:LSU,
author = "Casper Shikali Shivachi and Refuoe Mokhosi and Zhou
Shijie and Liu Qihe",
title = "Learning Syllables Using {Conv-LSTM} Model for
{Swahili} Word Representation and Part-of-speech
Tagging",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "58:1--58:25",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3445975",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3445975",
abstract = "The need to capture intra-word information in natural
language processing (NLP) tasks has inspired research
in learning various word representations at word,
character, or morpheme levels, but little attention has
been given to syllables from a syllabic \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "58",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ranathunga:2021:SAS,
author = "Surangika Ranathunga and Isuru Udara Liyanage",
title = "Sentiment Analysis of {Sinhala} News Comments",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "59:1--59:23",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3445035",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3445035",
abstract = "Sinhala is a low-resource language, for which basic
language and linguistic tools have not been properly
defined. This affects the development of NLP-based
end-user applications for Sinhala. Thus, when
implementing NLP tools such as sentiment analyzers,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "59",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhu:2021:GBM,
author = "Junnan Zhu and Lu Xiang and Yu Zhou and Jiajun Zhang
and Chengqing Zong",
title = "Graph-based Multimodal Ranking Models for Multimodal
Summarization",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "60:1--60:21",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3445794",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3445794",
abstract = "Multimodal summarization aims to extract the most
important information from the multimedia input. It is
becoming increasingly popular due to the rapid growth
of multimedia data in recent years. There are various
researches focusing on different \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "60",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Lalrempuii:2021:IEM,
author = "Candy Lalrempuii and Badal Soni and Partha Pakray",
title = "An Improved {English-to-Mizo} Neural Machine
Translation",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "61:1--61:21",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3445974",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3445974",
abstract = "Machine Translation is an effort to bridge language
barriers and misinterpretations, making communication
more convenient through the automatic translation of
languages. The quality of translations produced by
corpus-based approaches predominantly depends
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "61",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Thin:2021:TNL,
author = "Dang Van Thin and Ngan Luu-Thuy Nguyen and Tri Minh
Truong and Lac Si Le and Duy Tin Vo",
title = "Two New Large Corpora for {Vietnamese} Aspect-based
Sentiment Analysis at Sentence Level",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "62:1--62:22",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446678",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3446678",
abstract = "Aspect-based sentiment analysis has been studied in
both research and industrial communities over recent
years. For the low-resource languages, the standard
benchmark corpora play an important role in the
development of methods. In this article, we \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "62",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Alian:2021:BAP,
author = "Marwah Alian and Arafat Awajan and Ahmad Al-Hasan and
Raeda Akuzhia",
title = "Building {Arabic} Paraphrasing Benchmark based on
Transformation Rules",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "63:1--63:17",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446770",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3446770",
abstract = "Measuring semantic similarity between short texts is
an important task in many applications of natural
language processing, such as paraphrasing
identification. This process requires a benchmark of
sentence pairs that are labeled by Arab linguists and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "63",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Prabhakar:2021:QET,
author = "Dinesh Kumar Prabhakar and Sukomal Pal and Chiranjeev
Kumar",
title = "Query Expansion for Transliterated Text Retrieval",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "64:1--64:34",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447649",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3447649",
abstract = "With Web 2.0, there has been exponential growth in the
number of Web users and the volume of Web content. Most
of these users are not only consumers of the
information but also generators of it. People express
themselves here in colloquial languages, but \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "64",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Taghizadeh:2021:CLA,
author = "Nasrin Taghizadeh and Heshaam Faili",
title = "Cross-lingual Adaptation Using Universal
Dependencies",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "65:1--65:23",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3448251",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3448251",
abstract = "We describe a cross-lingual adaptation method based on
syntactic parse trees obtained from the Universal
Dependencies (UD), which are consistent across
languages, to develop classifiers in low-resource
languages. The idea of UD parsing is to capture
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "65",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Coban:2021:FTM,
author = "{\"O}nder {\c{C}}oban and Ali Inan and Selma Ayse
{\"O}zel",
title = "{Facebook} Tells Me Your Gender: an Exploratory Study
of Gender Prediction for {Turkish} {Facebook} Users",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "66:1--66:38",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3448253",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3448253",
abstract = "Online Social Networks (OSNs) are very popular
platforms for social interaction. Data posted publicly
over OSNs pose various threats against the individual
privacy of OSN users. Adversaries can try to predict
private attribute values, such as gender, as \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "66",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Qi:2021:DPB,
author = "Shanshan Qi and Limin Zheng and Feiyu Shang",
title = "Dependency Parsing-based Entity Relation Extraction
over {Chinese} Complex Text",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "67:1--67:34",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450273",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3450273",
abstract = "Open Relation Extraction (ORE) plays a significant
role in the field of Information Extraction. It breaks
the limitation that traditional relation extraction
must pre-define relational types in the annotated
corpus and specific domains restrictions, to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "67",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mousavi:2021:DPW,
author = "Zahra Mousavi and Heshaam Faili",
title = "Developing the {Persian} {Wordnet} of Verbs Using
Supervised Learning",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "68:1--68:18",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450969",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3450969",
abstract = "Nowadays, wordnets are extensively used as a major
resource in natural language processing and information
retrieval tasks. Therefore, the accuracy of wordnets
has a direct influence on the performance of the
involved applications. This paper presents a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "68",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Arora:2021:SSR,
author = "Karunesh Kumar Arora and Shyam Sunder Agrawal",
title = "Source-side Reordering to Improve Machine Translation
between Languages with Distinct Word Orders",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "69:1--69:18",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3448252",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3448252",
abstract = "English and Hindi have significantly different word
orders. English follows the subject-verb-object (SVO)
order, while Hindi primarily follows the
subject-object-verb (SOV) order. This difference poses
challenges to modeling this pair of languages for
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "69",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kumari:2021:RNS,
author = "Divya Kumari and Asif Ekbal and Rejwanul Haque and
Pushpak Bhattacharyya and Andy Way",
title = "Reinforced {NMT} for Sentiment and Content
Preservation in Low-resource Scenario",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "70:1--70:27",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450970",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3450970",
abstract = "The preservation of domain knowledge from source to
the target is crucial in any translation workflows.
Hence, translation service providers that use machine
translation (MT) in production could reasonably expect
that the translation process should \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "70",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sathish:2021:ISA,
author = "R. Sathish and P. Ezhumalai",
title = "Intermodal Sentiment Analysis for Images with Text
Captions Using the {VGGNET} Technique",
journal = j-TALLIP,
volume = "20",
number = "4",
pages = "71:1--71:14",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450971",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Sep 14 07:03:10 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3450971",
abstract = "More individuals actively express their opinions and
attitudes in social media through advanced improvements
such as visual content and text captions. Sentiment
analysis for visuals such as images, video, and GIFs
has become an emerging research trend in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "71",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Manogaran:2021:ISI,
author = "Gunasekaran Manogaran and Hassan Qudrat-Ullah and Qin
Xin",
title = "Introduction to the Special Issue on Deep Structured
Learning for Natural Language Processing, {Part 3}",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "72e:1--72e:3",
month = sep,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3476464",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3476464",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "72e",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Tan:2021:RPT,
author = "Junyang Tan and Dan Xia and Shiyun Dong and Honghao
Zhu and Binshi Xu",
title = "Research On Pre-Training Method and Generalization
Ability of Big Data Recognition Model of the {Internet
of Things}",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "72:1--72:15",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3433539",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3433539",
abstract = "The Internet of Things and big data are currently hot
concepts and research fields. The mining,
classification, and recognition of big data in the
Internet of Things system are the key links that are
widely of concern at present. The artificial neural
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "72",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2021:SAQ,
author = "Yarong Li",
title = "Sequence Alignment with {Q}-Learning Based on the
Actor--Critic Model",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "73:1--73:7",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3433540",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3433540",
abstract = "Multiple sequence alignment methods refer to a series
of algorithmic solutions for the alignment of
evolutionary-related sequences while taking into
account evolutionary events such as mutations,
insertions, deletions, and rearrangements under certain
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "73",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Naseem:2021:CSW,
author = "Usman Naseem and Imran Razzak and Shah Khalid Khan and
Mukesh Prasad",
title = "A Comprehensive Survey on Word Representation Models:
From Classical to State-of-the-Art Word Representation
Language Models",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "74:1--74:35",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434237",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3434237",
abstract = "Word representation has always been an important
research area in the history of natural language
processing (NLP). Understanding such complex text data
is imperative, given that it is rich in information and
can be used widely across various \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "74",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ji:2021:OAP,
author = "Xiaowen Ji and Jincheng Ni",
title = "An {OT-ET} Analysis of {Polish} Singular--Plural
Pairs",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "75:1--75:12",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434238",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3434238",
abstract = "Optimality Theory (OT) and Exemplar Theory (ET) are
two enchanting theories to many scholars, but each
still faces criticism and remaining persistent
problems. Application of both theories to areas in
linguistics where conflicts may arise has been
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "75",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jing:2021:GGM,
author = "Weipeng Jing and Xianyang Song and Donglin Di and
Houbing Song",
title = "{geoGAT}: Graph Model Based on Attention Mechanism for
Geographic Text Classification",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "76:1--76:18",
month = sep,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434239",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3434239",
abstract = "In the area of geographic information processing,
there are few researches on geographic text
classification. However, the application of this task
in Chinese is relatively rare. In our work, we intend
to implement a method to extract text containing
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "76",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bi:2021:BDL,
author = "Mingwen Bi and Qingchuan Zhang and Min Zuo and Zelong
Xu and Qingyu Jin",
title = "Bi-directional Long Short-Term Memory Model with
Semantic Positional Attention for the Question
Answering System",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "77:1--77:13",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3439800",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3439800",
abstract = "The intelligent question answering system aims to
provide quick and concise feedback on the questions of
users. Although the performance of phrase-level and
numerous attention models have been improved, the
sentence components and position information are
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "77",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Fan:2021:DNN,
author = "Xiaoqian Fan and Bowen Yang and Wenzhi Chen and
Quanfang Fan",
title = "Deep Neural Network Based Noised {Asian} Speech
Enhancement and Its Implementation on a Hearing Aid
App",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "78:1--78:14",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3439797",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3439797",
abstract = "This article studies noised Asian speech enhancement
based on the deep neural network (DNN) and its
implementation on an app. We use the THCHS-30 speech
dataset and the common noise dataset in daily life as
training and testing data of the DNN. To stack
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "78",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhao:2021:MOH,
author = "Chunhe Zhao and Balaanand Muthu and P. Mohamed
Shakeel",
title = "Multi-Objective Heuristic Decision Making and
Benchmarking for Mobile Applications in {English}
Language Learning",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "79:1--79:16",
month = sep,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3439799",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3439799",
abstract = "This research proposes to evaluate and analyze the
decision matrix for learner's English mobile
applications (EMAs) based on multi-objective heuristic
decision making with a view to listening, speaking,
reading, and writing. Because of the number of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "79",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Gupta:2021:TIC,
author = "Vedika Gupta and Nikita Jain and Shubham Shubham and
Agam Madan and Ankit Chaudhary and Qin Xin",
title = "Toward Integrated {CNN}-based Sentiment Analysis of
Tweets for Scarce-resource Language-{Hindi}",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "80:1--80:23",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450447",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3450447",
abstract = "Linguistic resources for commonly used languages such
as English and Mandarin Chinese are available in
abundance, hence the existing research in these
languages. However, there are languages for which
linguistic resources are scarcely available. One of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "80",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Do:2021:DVT,
author = "Phuc Do and Truong H. V. Phan and Brij B. Gupta",
title = "Developing a {Vietnamese} Tourism Question Answering
System Using Knowledge Graph and Deep Learning",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "81:1--81:18",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3453651",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3453651",
abstract = "In recent years, Question Answering (QA) systems have
increasingly become very popular in many sectors. This
study aims to use a knowledge graph and deep learning
to develop a QA system for tourism in Vietnam. First,
the QA system replies to a user's \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "81",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2021:REU,
author = "Meng Li",
title = "Research on Extraction of Useful Tourism Online
Reviews Based on Multimodal Feature Fusion",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "82:1--82:16",
month = sep,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3453694",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3453694",
abstract = "To effectively identify the influencing factors of the
perceived usefulness of multimodal data in online
reviews of tourism products, this article explores the
optimization method of online tourism products based on
user-generated content and conducts \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "82",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sun:2021:TCA,
author = "Lin Sun and Wenzheng Xu and Jimin Liu",
title = "Two-channel Attention Mechanism Fusion Model of Stock
Price Prediction Based on {CNN-LSTM}",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "83:1--83:12",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3453693",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3453693",
abstract = "Using hierarchical CNN, the company's multiple news is
characterized as three levels: sentence vectors,
chapter vectors, and enterprise sentiment vectors. By
combining the stock price data with the news lyric data
at the same time, the influence of news \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "83",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jain:2021:HCL,
author = "Praphula Kumar Jain and Vijayalakshmi Saravanan and
Rajendra Pamula",
title = "A Hybrid {CNN-LSTM}: a Deep Learning Approach for
Consumer Sentiment Analysis Using Qualitative
User-Generated Contents",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "84:1--84:15",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3457206",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3457206",
abstract = "With the fastest growth of information and
communication technology (ICT), the availability of web
content on social media platforms is increasing day by
day. Sentiment analysis from online reviews drawing
researchers' attention from various organizations
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "84",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Deng:2021:CCB,
author = "Fei Deng and Timothy V. Rasinski",
title = "A Computer Corpus-Based Study of {Chinese} {EFL}
Learners' Use of Adverbial Connectors and Its
Implications for Building a Language-Based Learning
Environment",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "85:1--85:16",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3457987",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3457987",
abstract = "This research adopts the methodology of corpus-based
analysis and contrastive interlanguage analysis (CIA),
using three corpora as the data source to analyze the
adverbial connectors used by Chinese EFL (English as a
foreign language) learners (i.e., \ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "85",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Deng:2021:CPC,
author = "Yongliang Deng and Hua Zhang",
title = "Configurational Path to {Chinese} Reading Stickiness
of Digital Library",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "86:1--86:18",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3459092",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3459092",
abstract = "Attracting and retaining readers in an increasingly
competitive environment is an urgent problem for
digital libraries of original literature. However, few
empirical studies address online reading stickiness,
particularly the factors affecting the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "86",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Javed:2021:BSS,
author = "Abdul Rehman Javed and Saif Ur Rehman and Mohib Ullah
Khan and Mamoun Alazab and Habib Ullah Khan",
title = "{Betalogger}: Smartphone Sensor-based Side-channel
Attack Detection and Text Inference Using Language
Modeling and Dense {MultiLayer} Neural Network",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "87:1--87:17",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3460392",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3460392",
abstract = "With the recent advancement of smartphone technology
in the past few years, smartphone usage has increased
on a tremendous scale due to its portability and
ability to perform many daily life tasks. As a result,
smartphones have become one of the most \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "87",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Lavanya:2021:MRS,
author = "R. Lavanya and B. Bharathi",
title = "Movie Recommendation System to Solve Data Sparsity
Using Collaborative Filtering Approach",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "88:1--88:14",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3459091",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3459091",
abstract = "With the increase in numbers of multimedia
technologies around us, movies and videos on social
media and OTT platforms are growing, making it
confusing for users to decide which one to watch for.
For this, movie recommendation systems are widely used.
It \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "88",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ma:2021:IAV,
author = "Jun Ma and Hongzhi Yu and Yan Xu and Kaiying Deng",
title = "An Investigational Approach for Vowels of the {Salar}
Language Based on a Database of Speech Acoustic
Parameters",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "89:1--89:10",
month = sep,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3459927",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3459927",
abstract = "According to relevant specifications, this article
divides, marks, and extracts the acquired speech
signals of the Salar language, and establishes the
speech acoustic parameter database of the Salar
language. Then, the vowels of the Salar language are
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "89",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kumar:2021:SAU,
author = "Akshi Kumar and Victor Hugo C. Albuquerque",
title = "Sentiment Analysis Using {XLM-R} Transformer and
Zero-shot Transfer Learning on Resource-poor {Indian}
Language",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "90:1--90:13",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3461764",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3461764",
abstract = "Sentiment analysis on social media relies on
comprehending the natural language and using a robust
machine learning technique that learns multiple layers
of representations or features of the data and produces
state-of-the-art prediction results. The \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "90",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhou:2021:NRO,
author = "Zhou Zhou and Fangmin Li and Shuiqiao Yang",
title = "A Novel Resource Optimization Algorithm Based on
Clustering and Improved Differential Evolution Strategy
Under a Cloud Environment",
journal = j-TALLIP,
volume = "20",
number = "5",
pages = "91:1--91:15",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3462761",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Oct 5 08:44:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3462761",
abstract = "Resource optimization algorithm based on clustering
and improved differential evolution strategy, as a new
global optimized algorithm, has wide applications in
language translation, language processing, document
understanding, cloud computing, and edge \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "91",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Tan:2021:BBT,
author = "Minghuan Tan and Jing Jiang and Bing Tian Dai",
title = "A {BERT}-Based Two-Stage Model for {Chinese Chengyu}
Recommendation",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "92:1--92:18",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3453185",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3453185",
abstract = "In Chinese, Chengyu are fixed phrases consisting of
four characters. As a type of idioms, their meanings
usually cannot be derived from their component
characters. In this article, we study the task of
recommending a Chengyu given a textual context.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "92",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Xiang:2021:RCL,
author = "Lu Xiang and Junnan Zhu and Yang Zhao and Yu Zhou and
Chengqing Zong",
title = "Robust Cross-lingual Task-oriented Dialogue",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "93:1--93:24",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3457571",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3457571",
abstract = "Cross-lingual dialogue systems are increasingly
important in e-commerce and customer service due to the
rapid progress of globalization. In real-world system
deployment, machine translation (MT) services are often
used before and after the dialogue system \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "93",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Premjith:2021:DLA,
author = "B. Premjith and K. P. Soman",
title = "Deep Learning Approach for the Morphological Synthesis
in {Malayalam} and {Tamil} at the Character Level",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "94:1--94:17",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3457976",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3457976",
abstract = "Morphological synthesis is one of the main components
of Machine Translation (MT) frameworks, especially when
any one or both of the source and target languages are
morphologically rich. Morphological synthesis is the
process of combining two words or two \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "94",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mundotiya:2021:LRB,
author = "Rajesh Kumar Mundotiya and Manish Kumar Singh and
Rahul Kapur and Swasti Mishra and Anil Kumar Singh",
title = "Linguistic Resources for {Bhojpuri}, {Magahi}, and
{Maithili}: Statistics about Them, Their Similarity
Estimates, and Baselines for Three Applications",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "95:1--95:37",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3458250",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3458250",
abstract = "Corpus preparation for low-resource languages and for
development of human language technology to analyze or
computationally process them is a laborious task,
primarily due to the unavailability of expert linguists
who are native speakers of these \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "95",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Telemala:2021:ETL,
author = "Joseph P. Telemala and Hussein Suleman",
title = "Exploring Topic-language Preferences in Multilingual
{Swahili} Information Retrieval in {Tanzania}",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "96:1--96:30",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3458671",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3458671",
abstract = "Habitual switching of languages is a common behaviour
among polyglots when searching for information on the
Web. Studies in information retrieval (IR) and
multilingual information retrieval (MLIR) suggest that
part of the reason for such regular switching
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "96",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Tian:2021:RRO,
author = "Yaolin Tian and Weize Gao and Xuxing Liu and Shanxiong
Chen and Bofeng Mo",
title = "The Research on Rejoining of the Oracle Bone Rubbings
Based on Curve Matching",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "97:1--97:17",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3460393",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3460393",
abstract = "The rejoining of oracle bone rubbings is a fundamental
topic for oracle research. However, it is a tough task
to reassemble severely broken oracle bone rubbings
because of detail loss in manual labeling, the great
time consumption of rejoining, and the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "97",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Munir:2021:NUS,
author = "Kashif Munir and Hai Zhao and Zuchao Li",
title = "Neural Unsupervised Semantic Role Labeling",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "98:1--98:16",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3461613",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3461613",
abstract = "The task of semantic role labeling (SRL) is dedicated
to finding the predicate-argument structure. Previous
works on SRL are mostly supervised and do not consider
the difficulty in labeling each example which can be
very expensive and time-consuming. In \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "98",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Saha:2021:UDM,
author = "Tulika Saha and Dhawal Gupta and Sriparna Saha and
Pushpak Bhattacharyya",
title = "A Unified Dialogue Management Strategy for
Multi-intent Dialogue Conversations in Multiple
Languages",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "99:1--99:22",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3461763",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3461763",
abstract = "Building Virtual Agents capable of carrying out
complex queries of the user involving multiple intents
of a domain is quite a challenge, because it demands
that the agent manages several subtasks simultaneously.
This article presents a universal Deep \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "99",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ni:2021:MDT,
author = "Weijian Ni and Tong Liu and Qingtian Zeng and Nengfu
Xie",
title = "Mining Domain Terminologies Using Search Engine's
Query Log",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "100:1--100:32",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3462327",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3462327",
abstract = "Domain terminologies are a basic resource for various
natural language processing tasks. To automatically
discover terminologies for a domain of interest, most
traditional approaches mostly rely on a domain-specific
corpus given in advance; thus, the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "100",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Xu:2021:CDG,
author = "Jun Xu and Zeyang Lei and Haifeng Wang and Zheng-Yu
Niu and Hua Wu and Wanxiang Che and Jizhou Huang and
Ting Liu",
title = "Coherent Dialog Generation with Query Graph",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "101:1--101:23",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3462551",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3462551",
abstract = "Learning to generate coherent and informative dialogs
is an enduring challenge for open-domain conversation
generation. Previous work leverage knowledge graph or
documents to facilitate informative dialog generation,
with little attention on dialog \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "101",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Joshi:2021:SSG,
author = "Manju Lata Joshi and Nisheeth Joshi and Namita
Mittal",
title = "{SGATS}: Semantic Graph-based Automatic Text
Summarization from {Hindi} Text Documents",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "102:1--102:32",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464381",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3464381",
abstract = "Creating a coherent summary of the text is a
challenging task in the field of Natural Language
Processing (NLP). Various Automatic Text Summarization
techniques have been developed for abstractive as well
as extractive summarization. This study focuses on
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "102",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Byambadorj:2021:NTM,
author = "Zolzaya Byambadorj and Ryota Nishimura and Altangerel
Ayush and Norihide Kitaoka",
title = "Normalization of Transliterated {Mongolian} Words
Using {Seq2Seq} Model with Limited Data",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "103:1--103:19",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464361",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3464361",
abstract = "The huge increase in social media use in recent years
has resulted in new forms of social interaction,
changing our daily lives. Due to increasing contact
between people from different cultures as a result of
globalization, there has also been an increase
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "103",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kaing:2021:TTP,
author = "Hour Kaing and Chenchen Ding and Masao Utiyama and
Eiichiro Sumita and Sethserey Sam and Sopheap Seng and
Katsuhito Sudoh and Satoshi Nakamura",
title = "Towards Tokenization and Part-of-Speech Tagging for
{Khmer}: Data and Discussion",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "104:1--104:16",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464378",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3464378",
abstract = "As a highly analytic language, Khmer has considerable
ambiguities in tokenization and part-of-speech (POS)
tagging processing. This topic is investigated in this
study. Specifically, a 20,000-sentence Khmer corpus
with manual tokenization and POS-tagging \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "104",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Tian:2021:NCM,
author = "Xiuxia Tian and Can Li and Bo Zhao",
title = "A Novel Classification Model {SA-MPCNN} for Power
Equipment Defect Text",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "105:1--105:21",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464380",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3464380",
abstract = "The text classification of power equipment defect is
of great significance to equipment health condition
evaluation and power equipment maintenance decisions.
Most of the existing classification methods do not
sufficiently consider the semantic relation \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "105",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sen:2021:BGT,
author = "Shibaprasad Sen and Ankan Bhattacharyya and Ram Sarkar
and Kaushik Roy",
title = "{BYANJON}: a Ground Truth Preparation System for
Online Handwritten {Bangla} Documents",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "106:1--106:16",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464379",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3464379",
abstract = "The work reported in this article deals with the
ground truth generation scheme for online handwritten
Bangla documents at text-line, word, and stroke levels.
The aim of the proposed scheme is twofold: firstly, to
build a document level database so that \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "106",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Maimaiti:2021:IDA,
author = "Mieradilijiang Maimaiti and Yang Liu and Huanbo Luan
and Zegao Pan and Maosong Sun",
title = "Improving Data Augmentation for Low-Resource {NMT}
Guided by {POS}-Tagging and Paraphrase Embedding",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "107:1--107:21",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464427",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3464427",
abstract = "Data augmentation is an approach for several text
generation tasks. Generally, in the machine translation
paradigm, mainly in low-resource language scenarios,
many data augmentation methods have been proposed. The
most used approaches for generating \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "107",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Vo:2021:SIS,
author = "Tham Vo",
title = "{SE4ExSum}: an Integrated Semantic-aware Neural
Approach with Graph Convolutional Network for
Extractive Text Summarization",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "108:1--108:22",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464426",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3464426",
abstract = "Recently, advanced techniques in deep learning such as
recurrent neural network (GRU, LSTM and Bi-LSTM) and
auto-encoding (attention-based transformer and BERT)
have achieved great successes in multiple application
domains including text summarization. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "108",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liu:2021:MCS,
author = "Lei Liu and Hao Chen and Yinghong Sun",
title = "A Multi-Classification Sentiment Analysis Model of
{Chinese} Short Text Based on Gated Linear Units and
Attention Mechanism",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "109:1--109:13",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464425",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3464425",
abstract = "Sentiment analysis of social media texts has become a
research hotspot in information processing. Sentiment
analysis methods based on the combination of machine
learning and sentiment lexicon need to select features.
Selected emotional features are often \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "109",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Niwa:2021:CCR,
author = "Ayana Niwa and Naoaki Okazaki and Kohei Wakimoto and
Keisuke Nishiguchi and Masataka Mouri",
title = "Construction of a Corpus of Rhetorical Devices in
Slogans and Structural Analysis of Antitheses",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "110:1--110:26",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3465218",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3465218",
abstract = "An advertising slogan is a sentence that expresses a
product or a work of art in a straightforward manner
and is used for advertising and publicity. Moving the
consumer's mind and attracting their interest can
significantly influence sales. Although \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "110",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Shin:2021:EEA,
author = "Jaehun Shin and Wonkee Lee and Byung-Hyun Go and
Baikjin Jung and Youngkil Kim and Jong-Hyeok Lee",
title = "Exploration of Effective Attention Strategies for
Neural Automatic Post-editing with Transformer",
journal = j-TALLIP,
volume = "20",
number = "6",
pages = "111:1--111:17",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3465383",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Sat Oct 16 05:29:47 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3465383",
abstract = "Automatic post-editing (APE) is the study of
correcting translation errors in the output of an
unknown machine translation (MT) system and has been
considered as a method of improving translation quality
without any modification to conventional MT \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "111",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kumar:2022:ISI,
author = "Akshi Kumar and Christian Esposito and Dimitrios A.
Karras",
title = "Introduction to Special Issue on Misinformation, Fake
News and Rumor Detection in Low-Resource Languages",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "1e:1--1e:3",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3505588",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3505588",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "1e",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sangwan:2022:DCD,
author = "Saurabh R. Sangwan and M. P. S. Bhatia",
title = "Denigrate Comment Detection in Low-Resource {Hindi}
Language Using Attention-Based Residual Networks",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "1:1--1:14",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3431729",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3431729",
abstract = "Cyberspace has been recognized as a conducive
environment for use of various hostile, direct, and
indirect behavioural tactics to target individuals or
groups. Denigration is one of the most frequently used
cyberbullying ploys to actively damage, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "1",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bhowmick:2022:MDF,
author = "Rajat Subhra Bhowmick and Isha Ganguli and Jayanta
Paul and Jaya Sil",
title = "A Multimodal Deep Framework for Derogatory Social
Media Post Identification of a Recognized Person",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "2:1--2:19",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447651",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3447651",
abstract = "In today's era of digitization, social media platforms
play a significant role in networking and influencing
the perception of the general population. Social
network sites have recently been used to carry out
harmful attacks against individuals, including
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "2",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jain:2022:FNC,
author = "Rachna Jain and Deepak Kumar Jain and Dharana and
Nitika Sharma",
title = "Fake News Classification: a Quantitative Research
Description",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "3:1--3:17",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447650",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3447650",
abstract = "Social media can render content circulating to reach
millions with a knack to influence people, despite the
questionable authencity of the facts. Internet sources
are the most convenient and easy approach to obtain any
information these days. Fake news \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "3",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ranasinghe:2022:MOL,
author = "Tharindu Ranasinghe and Marcos Zampieri",
title = "Multilingual Offensive Language Identification for
Low-resource Languages",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "4:1--4:13",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3457610",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3457610",
abstract = "Offensive content is pervasive in social media and a
reason for concern to companies and government
organizations. Several studies have been recently
published investigating methods to detect the various
forms of such content (e.g., hate speech, \ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "4",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Das:2022:DLA,
author = "Soma Das and Pooja Rai and Sanjay Chatterji",
title = "Deep Level Analysis of Legitimacy in {Bengali} News
Sentences",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "5:1--5:18",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3459928",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3459928",
abstract = "The tremendous increase in the growth of
misinformation in news articles has the potential
threat for the adverse effects on society. Hence, the
detection of misinformation in news data has become an
appealing research area. The task of annotating and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "5",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Saeed:2022:ECE,
author = "Ramsha Saeed and Hammad Afzal and Haider Abbas and
Maheen Fatima",
title = "Enriching Conventional Ensemble Learner with Deep
Contextual Semantics to Detect Fake News in {Urdu}",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "6:1--6:19",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3461614",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3461614",
abstract = "Increased connectivity has contributed greatly in
facilitating rapid access to information and reliable
communication. However, the uncontrolled information
dissemination has also resulted in the spread of fake
news. Fake news might be spread by a group \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "6",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Gumaei:2022:EAR,
author = "Abdu Gumaei and Mabrook S. Al-Rakhami and Mohammad
Mehedi Hassan and Victor Hugo C. {De Albuquerque} and
David Camacho",
title = "An Effective Approach for Rumor Detection of {Arabic}
Tweets Using {eXtreme} Gradient Boosting Method",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "7:1--7:16",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3461697",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3461697",
abstract = "Twitter is currently one of the most popular
microblogging platforms allowing people to post short
messages, news, thoughts, and so on. The Twitter user
community is growing very fast. It has an average of
328 million active accounts today, making it one
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "7",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Dhall:2022:BBF,
author = "Sakshi Dhall and Ashutosh Dhar Dwivedi and Saibal K.
Pal and Gautam Srivastava",
title = "Blockchain-based Framework for Reducing Fake or
Vicious News Spread on Social Media\slash Messaging
Platforms",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "8:1--8:33",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3467019",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3467019",
abstract = "With social media becoming the most frequently used
mode of modern-day communications, the propagation of
fake or vicious news through such modes of
communication has emerged as a serious problem. The
scope of the problem of fake or vicious news may range
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "8",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{De:2022:TBA,
author = "Arkadipta De and Dibyanayan Bandyopadhyay and Baban
Gain and Asif Ekbal",
title = "A Transformer-Based Approach to Multilingual Fake News
Detection in Low-Resource Languages",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "9:1--9:20",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3472619",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3472619",
abstract = "Fake news classification is one of the most
interesting problems that has attracted huge attention
to the researchers of artificial intelligence, natural
language processing, and machine learning (ML). Most of
the current works on fake news detection are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "9",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Samadi:2022:PFN,
author = "Mohammadreza Samadi and Maryam Mousavian and Saeedeh
Momtazi",
title = "{Persian} Fake News Detection: Neural Representation
and Classification at Word and Text Levels",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "10:1--10:11",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3472620",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3472620",
abstract = "Nowadays, broadcasting news on social media and
websites has grown at a swifter pace, which has had
negative impacts on both the general public and
governments; hence, this has urged us to build a fake
news detection system. Contextualized word embeddings
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "10",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Najadat:2022:DAS,
author = "Hassan Najadat and Mohammad A. Alzubaidi and Islam
Qarqaz",
title = "Detecting {Arabic} Spam Reviews in Social Networks
Based on Classification Algorithms",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "11:1--11:13",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3476115",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3476115",
abstract = "Reviews or comments that users leave on social media
have great importance for companies and business
entities. New product ideas can be evaluated based on
customer reactions. However, this use of social media
is complicated by those who post spam on \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "11",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jahanbakhsh-Nagadeh:2022:DCB,
author = "Zoleikha Jahanbakhsh-Nagadeh and Mohammad-Reza
Feizi-Derakhshi and Arash Sharifi",
title = "A Deep Content-Based Model for {Persian} Rumor
Verification",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "12:1--12:29",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487289",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3487289",
abstract = "During the development of social media, there has been
a transformation in social communication. Despite their
positive applications in social interactions and news
spread, it also provides an ideal platform for
spreading rumors. Rumors can endanger the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "12",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Alam:2022:RUP,
author = "Mehreen Alam and Sibt {Ul Hussain}",
title = "{Roman--Urdu--Parl}: {Roman--Urdu} and {Urdu} Parallel
Corpus for {Urdu} Language Understanding",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "13:1--13:20",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464424",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3464424",
abstract = "Availability of corpora is a basic requirement for
conducting research in a particular language.
Unfortunately, for a morphologically rich language like
Urdu, despite being used by over a 100 million people
around the globe, the dearth of corpora is a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "13",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Nassif:2022:EES,
author = "Ali Bou Nassif and Abdollah Masoud Darya and Ashraf
Elnagar",
title = "Empirical Evaluation of Shallow and Deep Learning
Classifiers for {Arabic} Sentiment Analysis",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "14:1--14:25",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3466171",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3466171",
abstract = "This work presents a detailed comparison of the
performance of deep learning models such as
convolutional neural networks, long short-term memory,
gated recurrent units, their hybrids, and a selection
of shallow learning classifiers for sentiment analysis
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "14",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Phukon:2022:SEU,
author = "Bornali Phukon and Akash Anil and Sanasam Ranbir Singh
and Priyankoo Sarmah",
title = "Synonymy Expansion Using Link Prediction Methods: a
Case Study of {Assamese} {WordNet}",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "15:1--15:21",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3467966",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3467966",
abstract = "WordNets built for low-resource languages, such as
Assamese, often use the expansion methodology. This may
result in missing lexical entries and missing synonymy
relations. As the Assamese WordNet is also built using
the expansion method, using the Hindi \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "15",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Eddine:2022:NCE,
author = "Meftah Mohammed Charaf Eddine",
title = "A New Concept of Electronic Text Based on Semantic
Coding System for Machine Translation",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "16:1--16:16",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3469655",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3469655",
abstract = "In the field of machine translation of texts, the
ambiguity in both lexical (dictionary) and structural
aspects is still one of the difficult problems.
Researchers in this field use different approaches, the
most important of which is machine learning in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "16",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Xiang:2022:EGN,
author = "Yan Xiang and Zhengtao Yu and Junjun Guo and Yuxin
Huang and Yantuan Xian",
title = "Event Graph Neural Network for Opinion Target
Classification of Microblog Comments",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "17:1--17:13",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3469725",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3469725",
abstract = "Opinion target classification of microblog comments is
one of the most important tasks for public opinion
analysis about an event. Due to the high cost of manual
labeling, opinion target classification is generally
considered as a weak-supervised task. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "17",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Turan:2022:CIA,
author = "Erhan Turan and Umut Orhan",
title = "Confidence Indexing of Automated Detected Synsets: a
Case Study on Contemporary {Turkish} Dictionary",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "18:1--18:19",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3469724",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3469724",
abstract = "In this study, a novel confidence indexing algorithm
is proposed to minimize human labor in controlling the
reliability of automatically extracted synsets from a
non-machine-readable monolingual dictionary.
Contemporary Turkish Dictionary of Turkish \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "18",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Baruah:2022:LRN,
author = "Rupjyoti Baruah and Rajesh Kumar Mundotiya and Anil
Kumar Singh",
title = "Low Resource Neural Machine Translation: {Assamese}
to\slash from Other {Indo--Aryan} ({Indic}) Languages",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "19:1--19:32",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3469721",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3469721",
abstract = "Machine translation (MT) systems have been built using
numerous different techniques for bridging the language
barriers. These techniques are broadly categorized into
approaches like Statistical Machine Translation (SMT)
and Neural Machine Translation (\ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "19",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Fadel:2022:NAT,
author = "Ali Fadel and Ibraheem Tuffaha and Mahmoud Al-Ayyoub",
title = "Neural {Arabic} Text Diacritization: State-of-the-Art
Results and a Novel Approach for {Arabic} {NLP}
Downstream Tasks",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "20:1--20:25",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3470849",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3470849",
abstract = "In this work, we present several deep learning models
for the automatic diacritization of Arabic text. Our
models are built using two main approaches, viz.
Feed-Forward Neural Network (FFNN) and Recurrent Neural
Network (RNN), with several enhancements \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "20",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kulkarni:2022:SAH,
author = "Dhanashree S. Kulkarni and Sunil S. Rodd",
title = "Sentiment Analysis in {Hindi} --- a Survey on the
State-of-the-art Techniques",
journal = j-TALLIP,
volume = "21",
number = "1",
pages = "21:1--21:46",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3469722",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Jan 31 07:33:24 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3469722",
abstract = "Sentiment Analysis (SA) has been a core interest in
the field of text mining research, dealing with
computational processing of sentiments, views, and
subjective nature of the text. Due to the availability
of extensive web-based data in Indian languages
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "21",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yu:2022:ICV,
author = "Zhiqiang Yu and Zhengtao Yu and Yantuan Xian and Yuxin
Huang and Junjun Guo",
title = "Improving {Chinese--Vietnamese} Neural Machine
Translation with Linguistic Differences",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "22:1--22:12",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3477536",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3477536",
abstract = "We present a simple, efficient data augmentation
approach for boosting Chinese-Vietnamese neural machine
translation performance by leveraging the linguistic
difference between the two languages. We first define
the formalized representation of modifier \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "22",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Halabi:2022:INA,
author = "Dana Halabi and Ebaa Fayyoumi and Arafat Awajan",
title = "{I3rab}: a New {Arabic} Dependency Treebank Based on
{Arabic} Grammatical Theory",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "23:1--23:32",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3472295",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3472295",
abstract = "Treebanks are valuable linguistic resources that
include the syntactic structure of a language sentence
in addition to part-of-speech tags and morphological
features. They are mainly utilized in modeling
statistical parsers. Although the statistical
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "23",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Yang:2022:ASN,
author = "Haitong Yang and Guangyou Zhou and Tingting He",
title = "Adversarial Separation Network for Text Style
Transfer",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "24:1--24:14",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3472621",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3472621",
abstract = "This article considers the task of text style
transfer: transforming a specific style of sentence
into another while preserving its style-independent
content. A dominate approach to text style transfer is
to learn a good content factor of text, define a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "24",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Fatima:2022:DCL,
author = "Ghazeefa Fatima and Rao Muhammad Adeel Nawab and
Muhammad Salman Khan and Ali Saeed",
title = "Developing a Cross-lingual Semantic Word Similarity
Corpus for {English--Urdu} Language Pair",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "25:1--25:16",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3472618",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3472618",
abstract = "Semantic word similarity is a quantitative measure of
how much two words are contextually similar. Evaluation
of semantic word similarity models requires a benchmark
corpus. However, despite the millions of speakers and
the large digital text of the Urdu \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "25",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Muneer:2022:CLT,
author = "Iqra Muneer and Rao Muhammad Adeel Nawab",
title = "Cross-lingual Text Reuse Detection Using Translation
Plus Monolingual Analysis for {English-Urdu} Language
Pair",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "26:1--26:18",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3473331",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3473331",
abstract = "Cross-Lingual Text Reuse Detection (CLTRD) has
recently attracted the attention of the research
community due to a large amount of digital text readily
available for reuse in multiple languages through
online digital repositories. In addition, efficient
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "26",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Xu:2022:LRL,
author = "Fan Xu and Yangjie Dan and Keyu Yan and Yong Ma and
Mingwen Wang",
title = "Low-Resource Language Discrimination toward {Chinese}
Dialects with Transfer Learning and Data Augmentation",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "27:1--27:21",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3473499",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3473499",
abstract = "Chinese dialects discrimination is a challenging
natural language processing task due to scarce
annotation resource. In this article, we develop a
novel Chinese dialects discrimination framework with
transfer learning and data augmentation (CDDTLDA) in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "27",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Rana:2022:UAS,
author = "Toqir A. Rana and Kiran Shahzadi and Tauseef Rana and
Ahsan Arshad and Mohammad Tubishat",
title = "An Unsupervised Approach for Sentiment Analysis on
Social Media Short Text Classification in {Roman
Urdu}",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "28:1--28:16",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3474119",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3474119",
abstract = "During the last two decades, sentiment analysis, also
known as opinion mining, has become one of the most
explored research areas in Natural Language Processing
(NLP) and data mining. Sentiment analysis focuses on
the sentiments or opinions of consumers \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "28",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mukherjee:2022:URL,
author = "Jayati Mukherjee and Swapan K. Parui and Utpal Roy",
title = "An Unsupervised and Robust Line and Word Segmentation
Method for Handwritten and Degraded Printed Document",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "29:1--29:31",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3474118",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3474118",
abstract = "Segmentation of text lines and words in an
unconstrained handwritten or a machine-printed degraded
document is a challenging document analysis problem due
to the heterogeneity in the document structure. Often
there is un-even skew between the lines and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "29",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mukta:2022:CGB,
author = "Md. Saddam Hossain Mukta and Md. Adnanul Islam and
Faisal Ahamed Khan and Afjal Hossain and Shuvanon Razik
and Shazzad Hossain and Jalal Mahmud",
title = "A Comprehensive Guideline for {Bengali} Sentiment
Annotation",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "30:1--30:19",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3474363",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3474363",
abstract = "Sentiment Analysis (SA) is a Natural Language
Processing (NLP) and an Information Extraction (IE)
task that primarily aims to obtain the writer's
feelings expressed in positive or negative by analyzing
a large number of documents. SA is also widely
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "30",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Hu:2022:GFQ,
author = "Yue Hu and Haitong Yang and Guangyou Zhou and Jimmy
Xiangji Huang",
title = "Generating Factoid Questions with Question Type
Enhanced Representation and Attention-based Copy
Mechanism",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "31:1--31:18",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3474555",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3474555",
abstract = "Question generation over knowledge bases is an
important research topic. How to deal with rare and
low-frequency words in traditional generation models is
a key challenge for question generation. Although the
copy mechanism provides significant \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "31",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Singh:2022:CSH,
author = "Pawan Kumar Singh and Ram Sarkar and Ajith Abraham and
Mita Nasipuri",
title = "A Case Study on Handwritten {Indic} Script
Classification: Benchmarking of the Results at Page,
Block, Text-line, and Word Levels",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "32:1--32:36",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3476102",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3476102",
abstract = "Handwritten script classification is still considered
as a challenging research problem in the domain of
document image analysis. Although some research
attempts have been made by the researchers for solving
the challenging issues, a comprehensive \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "32",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Gu:2022:MTF,
author = "Xiaoqing Gu and Kaijian Xia and Yizhang Jiang and
Alireza Jolfaei",
title = "Multi-task Fuzzy Clustering-Based Multi-task {TSK}
Fuzzy System for Text Sentiment Classification",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "33:1--33:24",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3476103",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3476103",
abstract = "Text sentiment classification is an important
technology for natural language processing. A fuzzy
system is a strong tool for processing imprecise or
ambiguous data, and it can be used for text sentiment
analysis. This article proposes a new formulation
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "33",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sarwar:2022:UWU,
author = "Raheem Sarwar and Saeed-Ul Hassan",
title = "{UrduAI}: Writeprints for {Urdu} Authorship
Identification",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "34:1--34:18",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3476467",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3476467",
abstract = "The authorship identification task aims at identifying
the original author of an anonymous text sample from a
set of candidate authors. It has several application
domains such as digital text forensics and information
retrieval. These application domains \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "34",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Fatima:2022:SAW,
author = "Tayyaba Fatima and Raees {Ul Islam} and Muhammad Waqas
Anwar and M. Hasan Jamal and M. Tayyab Chaudhry and
Zeeshan Gillani",
title = "{STEMUR}: an Automated Word Conflation Algorithm for
the {Urdu} Language",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "35:1--35:20",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3476226",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3476226",
abstract = "Stemming is a common word conflation method that
perceives stems embedded in the words and decreases
them to their stem (root) by conflating all the
morphologically related terms into a single term,
without doing a complete morphological analysis. This
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "35",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Hiraoka:2022:RNH,
author = "Tatsuya Hiraoka and Sho Takase and Kei Uchiumi and
Atsushi Keyaki and Naoaki Okazaki",
title = "Recurrent Neural Hidden {Markov} Model for High-order
Transition",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "36:1--36:15",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3476511",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3476511",
abstract = "We propose a method to pay attention to high-order
relations among latent states to improve the
conventional HMMs that focus only on the latest latent
state, since they assume Markov property. To address
the high-order relations, we apply an RNN to each
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "36",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{S:2022:IWS,
author = "Sruthi S. and B. Kannan and Binu Paul",
title = "Improved Word Sense Determination in {Malayalam} using
Latent {Dirichlet} Allocation and Semantic Features",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "37:1--37:11",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3476978",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3476978",
abstract = "Recent years have witnessed phenomenal developments
worldwide in the field of NLP. But developments in
Indian regional languages are very few compared to
them. This work is a step towards the construction of a
target word sense disambiguation system in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "37",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Saeed:2022:IFD,
author = "Ali Saeed and Rao Muhammad Adeel Nawab and Mark
Stevenson",
title = "Investigating the Feasibility of Deep Learning Methods
for {Urdu} Word Sense Disambiguation",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "38:1--38:16",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3477578",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3477578",
abstract = "Word Sense Disambiguation (WSD), the process of
automatically identifying the correct meaning of a word
used in a given context, is a significant challenge in
Natural Language Processing. A range of approaches to
the problem has been explored by the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "38",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Katyayan:2022:DAR,
author = "Pragya Katyayan and Nisheeth Joshi",
title = "Development of Automatic Rule-based Semantic Tagger
and {Karaka} Analyzer for {Hindi}",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "39:1--39:25",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3479155",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3479155",
abstract = "Hindi is the third most-spoken language in the world
(615 million speakers) and has the fourth highest
native speakers (341 million). It is an inflectionally
rich and relatively free word-order language with an
immense vocabulary set. Despite being such a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "39",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Abbad:2022:SED,
author = "Hamza Abbad and Shengwu Xiong",
title = "Simple Extensible Deep Learning Model for Automatic
{Arabic} Diacritization",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "40:1--40:16",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3480938",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3480938",
abstract = "Automatic diacritization is an Arabic natural language
processing topic based on the sequence labeling task
where the labels are the diacritics and the letters are
the sequence elements. A letter can have from zero up
to two diacritics. The dataset used \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "40",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Huang:2022:DAW,
author = "Kaiyu Huang and Keli Xiao and Fengran Mo and Bo Jin
and Zhuang Liu and Degen Huang",
title = "Domain-Aware Word Segmentation for {Chinese} Language:
a Document-Level Context-Aware Model",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "41:1--41:16",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3481298",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3481298",
abstract = "Word segmentation is an essential and challenging task
in natural language processing, especially for the
Chinese language due to its high linguistic complexity.
Existing methods for Chinese word segmentation,
including statistical machine learning \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "41",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Bai:2022:UPT,
author = "Guirong Bai and Shizhu He and Kang Liu and Jun Zhao",
title = "Using Pre-trained Language Model to Enhance Active
Learning for Sentence Matching",
journal = j-TALLIP,
volume = "21",
number = "2",
pages = "42:1--42:19",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3480937",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Mon Mar 28 11:35:36 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3480937",
abstract = "Active learning is an effective method to
substantially alleviate the problem of expensive
annotation cost for data-driven models. Recently,
pre-trained language models have been demonstrated to
be powerful for learning language representations. In
this \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "42",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Al-Shatnawi:2022:AHW,
author = "Atallah Mahmoud Al-Shatnawi and Faisal Al-Saqqar and
Alireza Souri",
title = "{Arabic} Handwritten Word Recognition Based on
Stationary Wavelet Transform Technique using Machine
Learning",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "43:1--43:21",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3474391",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3474391",
abstract = "This paper is aimed at improving the performance of
the word recognition system (WRS) of handwritten Arabic
text by extracting features in the frequency domain
using the Stationary Wavelet Transform (SWT) method
using machine learning, which is a wavelet \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "43",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Nazir:2022:AAR,
author = "Zulqarnain Nazir and Khurram Shahzad and Muhammad
Kamran Malik and Waheed Anwar and Imran Sarwar Bajwa
and Khawar Mehmood",
title = "Authorship Attribution for a Resource Poor Language
--- {Urdu}",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "44:1--44:23",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487061",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3487061",
abstract = "Authorship attribution refers to examining the writing
style of authors to determine the likelihood of the
original author of a document from a given set of
potential authors. Due to the wide range of authorship
attribution applications, a plethora of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "44",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sheikhaei:2022:JTL,
author = "Mohammad Sadegh Sheikhaei and Hasan Zafari and Yuan
Tian",
title = "Joined Type Length Encoding for Nested Named Entity
Recognition",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "45:1--45:23",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487057",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3487057",
abstract = "In this article, we propose a new encoding scheme for
named entity recognition (NER) called Joined
Type-Length encoding (JoinedTL). Unlike most existing
named entity encoding schemes, which focus on flat
entities, JoinedTL can label nested named entities
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "45",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Li:2022:DVC,
author = "Mei Li and Jiajun Zhang and Xiang Lu and Chengqing
Zong",
title = "Dual-View Conditional Variational Auto-Encoder for
Emotional Dialogue Generation",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "46:1--46:18",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3481890",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3481890",
abstract = "Emotional dialogue generation aims to generate
appropriate responses that are content relevant with
the query and emotion consistent with the given emotion
tag. Previous work mainly focuses on incorporating
emotion information into the sequence to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "46",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Raval:2022:IDL,
author = "Deepang Raval and Vyom Pathak and Muktan Patel and
Brijesh Bhatt",
title = "Improving Deep Learning based Automatic Speech
Recognition for {Gujarati}",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "47:1--47:18",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3483446",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3483446",
abstract = "We present a novel approach for improving the
performance of an End-to-End speech recognition system
for the Gujarati language. We follow a deep
learning-based approach that includes Convolutional
Neural Network, Bi-directional Long Short Term Memory
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "47",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jiang:2022:TTD,
author = "Shu Jiang and Zuchao Li and Hai Zhao and Bao-Liang Lu
and Rui Wang",
title = "Tri-training for Dependency Parsing Domain
Adaptation",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "48:1--48:17",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3488367",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3488367",
abstract = "In recent years, the research on dependency parsing
focuses on improving the accuracy of the
domain-specific (in-domain) test datasets and has made
remarkable progress. However, there are innumerable
scenarios in the real world that are not covered by the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "48",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mishra:2022:ECA,
author = "Santosh Kumar Mishra and Gaurav Rai and Sriparna Saha
and Pushpak Bhattacharyya",
title = "Efficient Channel Attention Based Encoder-Decoder
Approach for Image Captioning in {Hindi}",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "49:1--49:17",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3483597",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3483597",
abstract = "Image captioning refers to the process of generating a
textual description that describes objects and
activities present in a given image. It connects two
fields of artificial intelligence, computer vision, and
natural language processing. Computer vision \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "49",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Liao:2022:SLM,
author = "Xianwen Liao and Yongzhong Huang and Peng Yang and Lei
Chen",
title = "A Statistical Language Model for Pre-Trained Sequence
Labeling: a Case Study on {Vietnamese}",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "50:1--50:21",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3483524",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3483524",
abstract = "By defining the computable word segmentation unit and
studying its probability characteristics, we establish
an unsupervised statistical language model (SLM) for a
new pre-trained sequence labeling framework in this
article. The proposed SLM is an \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "50",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2022:TCC,
author = "Zhongguo Wang and Bao Zhang",
title = "Toxic Comment Classification Based on Bidirectional
Gated Recurrent Unit and Convolutional Neural Network",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "51:1--51:12",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3488366",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3488366",
abstract = "For English toxic comment classification, this paper
presents the model that combines Bi-GRU and CNN
optimized by global average pooling (BG-GCNN) based on
the bidirectional gated recurrent unit (Bi-GRU) and
global pooling optimized convolution neural \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "51",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Sun:2022:OSR,
author = "Jian Sun and Yu Zhou and Chengqing Zong",
title = "One-Shot Relation Learning for Knowledge Graphs via
Neighborhood Aggregation and Paths Encoding",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "52:1--52:19",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3484729",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3484729",
abstract = "The relation learning between two entities is an
essential task in knowledge graph (KG) completion that
has received much attention recently. Previous work
almost exclusively focused on relations widely seen in
the original KGs, which means that enough \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "52",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Munir:2022:MAI,
author = "Kashif Munir and Hongxiao Bai and Hai Zhao and Junhan
Zhao",
title = "Memorizing All for Implicit Discourse Relation
Recognition",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "53:1--53:20",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3485016",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3485016",
abstract = "Implicit discourse relation recognition is a
challenging task due to the absence of the necessary
informative clues from explicit connectives. An
implicit discourse relation recognizer has to carefully
tackle the semantic similarity of sentence pairs and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "53",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Reddy:2022:FBA,
author = "A. Pramod Reddy and Vijayarajan V.",
title = "Fusion Based {AER} System Using Deep Learning Approach
for Amplitude and Frequency Analysis",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "54:1--54:19",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3488369",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3488369",
abstract = "Automatic emotion recognition from Speech (AERS)
systems based on acoustical analysis reveal that some
emotional classes persist with ambiguity. This study
employed an alternative method aimed at providing deep
understanding into the amplitude-frequency, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "54",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Zhang:2022:LJE,
author = "Hu Zhang and Bangze Pan and Ru Li",
title = "Legal Judgment Elements Extraction Approach with Law
Article-aware Mechanism",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "55:1--55:15",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3485244",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3485244",
abstract = "Legal judgment elements extraction (LJEE) aims to
identify the different judgment features from the fact
description in legal documents automatically, which
helps to improve the accuracy and interpretability of
the judgment results. In real court rulings, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "55",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Rani:2022:ABS,
author = "Sujata Rani and Parteek Kumar",
title = "Aspect-based Sentiment Analysis using Dependency
Parsing",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "56:1--56:19",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3485243",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3485243",
abstract = "In this paper, an aspect-based Sentiment Analysis (SA)
system for Hindi is presented. The proposed system
assigns a separate sentiment towards the different
aspects of a sentence as well as it evaluates the
overall sentiment expressed in a sentence. In
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "56",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ishraq:2022:TDU,
author = "Mir Ragib Ishraq and Nitesh Khadka and Asif Mohammed
Samir and M. Shahidur Rahman",
title = "Towards Developing Uniform Lexicon Based Sorting
Algorithm for Three Prominent {Indo--Aryan} Languages",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "57:1--57:20",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3488371",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3488371",
abstract = "Three different Indic/Indo-Aryan languages ---
Bengali, Hindi and Nepali have been explored here in
character level to find out similarities and
dissimilarities. Having shared the same root, the
Sanskrit, Indic languages bear common characteristics.
That is \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "57",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Pandey:2022:HAS,
author = "Shilpa Pandey and Gaurav Harit",
title = "Handwritten Annotation Spotting in Printed Documents
Using Top-Down Visual Saliency Models",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "58:1--58:25",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3485468",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3485468",
abstract = "In this article, we address the problem of localizing
text and symbolic annotations on the scanned image of a
printed document. Previous approaches have considered
the task of annotation extraction as binary
classification into printed and handwritten \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "58",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Kang:2022:ELT,
author = "Xiaomian Kang and Yang Zhao and Jiajun Zhang and
Chengqing Zong",
title = "Enhancing Lexical Translation Consistency for
Document-Level Neural Machine Translation",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "59:1--59:21",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3485469",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3485469",
abstract = "Document-level neural machine translation (DocNMT) has
yielded attractive improvements. In this article, we
systematically analyze the discourse phenomena in
Chinese-to-English translation, and focus on the most
obvious ones, namely lexical translation \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "59",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Azmi:2022:LDR,
author = "Aqil M. Azmi and Rehab M. Alnefaie and Hatim A.
Aboalsamh",
title = "Light Diacritic Restoration to Disambiguate Homographs
in Modern {Arabic} Texts",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "60:1--60:14",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3486675",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3486675",
abstract = "Diacritic restoration (also known as diacritization or
vowelization) is the process of inserting the correct
diacritical markings into a text. Modern Arabic is
typically written without diacritics, e.g., newspapers.
This lack of diacritical markings often \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "60",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Guo:2022:ACD,
author = "Aibo Guo and Xinyi Li and Ning Pang and Xiang Zhao",
title = "Adversarial Cross-domain Community Question
Retrieval",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "61:1--61:22",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487291",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3487291",
abstract = "Community Q\&A forum is a special type of social media
that provides a platform to raise questions and to
answer them (both by forum participants), to facilitate
online information sharing. Currently, community Q\&A
forums in professional domains have \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "61",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wang:2022:CSS,
author = "Shaolei Wang and Zhongyuan Wang and Wanxiang Che and
Sendong Zhao and Ting Liu",
title = "Combining Self-supervised Learning and Active Learning
for Disfluency Detection",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "62:1--62:25",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487290",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3487290",
abstract = "Spoken language is fundamentally different from the
written language in that it contains frequent
disfluencies or parts of an utterance that are
corrected by the speaker. Disfluency detection
(removing these disfluencies) is desirable to clean the
input \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "62",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Warjri:2022:PSP,
author = "Sunita Warjri and Partha Pakray and Saralin A. Lyngdoh
and Arnab Kumar Maji",
title = "Part-of-Speech {(POS)} Tagging Using Deep
Learning-Based Approaches on the Designed {Khasi} {POS}
Corpus",
journal = j-TALLIP,
volume = "21",
number = "3",
pages = "63:1--63:24",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3488381",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Tue Apr 5 06:29:03 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3488381",
abstract = "Part-of-speech (POS) tagging is one of the research
challenging fields in natural language processing
(NLP). It requires good knowledge of a particular
language with large amounts of data or corpora for
feature engineering, which can lead to achieving a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "63",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Hastuti:2022:QLS,
author = "Rochana Prih Hastuti and Yohanes Suyanto and Anny
Kartika Sari",
title = "{Q}-Learning for Shift-Reduce Parsing in {Indonesian}
Tree-{LSTM}-Based Text Generation",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "64:1--64:15",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3490501",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3490501",
abstract = "Tree-LSTM algorithm accommodates tree structure
processing to extract information outside the linear
sequence pattern. The use of Tree-LSTM in text
generation problems requires the help of an external
parser at each generation iteration. Developing a good
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "64",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Lee:2022:CEB,
author = "Lung-Hao Lee and Jian-Hong Li and Liang-Chih Yu",
title = "{Chinese} {EmoBank}: Building Valence-Arousal
Resources for Dimensional Sentiment Analysis",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "65:1--65:18",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3489141",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3489141",
abstract = "An increasing amount of research has recently focused
on dimensional sentiment analysis that represents
affective states as continuous numerical values on
multiple dimensions, such as valence-arousal (VA)
space. Compared to the categorical approach that
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "65",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Chen:2022:DDG,
author = "Shanxiong Chen and Ye Yang and Xuxin Liu and Shiyu
Zhu",
title = "Dual Discriminator {GAN}: Restoring Ancient Yi
Characters",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "66:1--66:23",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3490031",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3490031",
abstract = "In China, the damage of ancient Yi books are serious.
Due to the lack of ancient Yi experts, the repairation
of ancient Yi books is progressing very slowly. The
artificial intelligence is successful in the field of
image and text, so it is feasible for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "66",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jana:2022:HDL,
author = "Abhik Jana and Gopalakrishnan Venkatesh and Seid Muhie
Yimam and Chris Biemann",
title = "Hypernymy Detection for Low-resource Languages: a
Study for {Hindi, Bengali, and Amharic}",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "67:1--67:21",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3490389",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3490389",
abstract = "Numerous attempts for hypernymy relation (e.g., dog
``is-a'' animal) detection have been made for
resourceful languages like English, whereas efforts
made for low-resource languages are scarce primarily
due to lack of gold-standard datasets and suitable
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "67",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mao:2022:LDM,
author = "Zhuoyuan Mao and Chenhui Chu and Sadao Kurohashi",
title = "Linguistically Driven Multi-Task Pre-Training for
Low-Resource Neural Machine Translation",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "68:1--68:29",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3491065",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3491065",
abstract = "In the present study, we propose novel
sequence-to-sequence pre-training objectives for
low-resource machine translation (NMT):
Japanese-specific sequence to sequence (JASS) for
language pairs involving Japanese as the source or
target language, and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "68",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Abderrahim:2022:AWS,
author = "Mohammed Alaeddine Abderrahim and Mohammed El-Amine
Abderrahim",
title = "{Arabic} Word Sense Disambiguation for Information
Retrieval",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "69:1--69:19",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3510451",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3510451",
abstract = "In the context of using semantic resources for
information retrieval, the relationship and distance
between concepts are considered important for word
sense disambiguation. In this article, we experiment
with Conceptual Density and Random Walk with graph
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "69",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ma:2022:ERC,
author = "Hongchao Ma and Zhongqing Wang and Xiabing Zhou and
Guodong Zhou and Qinglei Zhou",
title = "Emotion Recognition with Conversational Generation
Transfer",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "70:1--70:17",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494532",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3494532",
abstract = "Emotion recognition in conversation is one of the
essential tasks of natural language processing.
However, this task's annotation data is insufficient
since such data is hard to collect and annotate.
Meanwhile, there is large-scale data for conversational
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "70",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Wu:2022:CEE,
author = "Xiaohua Wu and Tengrui Wang and Youping Fan and
Fangjian Yu",
title = "{Chinese} Event Extraction via Graph Attention
Network",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "71:1--71:12",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494533",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3494533",
abstract = "Event extraction plays an important role in natural
language processing (NLP) applications, including
question answering and information retrieval. Most of
the previous state-of-the-art methods were lack of
ability in capturing features in long range. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "71",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Cui:2022:IGD,
author = "Yiming Cui and Wanxiang Che and Ziqing Yang and Ting
Liu and Bing Qin and Shijin Wang and Guoping Hu",
title = "Interactive Gated Decoder for Machine Reading
Comprehension",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "72:1--72:19",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3501399",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3501399",
abstract = "Owing to the availability of various large-scale
Machine Reading Comprehension (MRC) datasets, building
an effective model to extract passage spans for
question answering has been well studied in previous
works. However, in reality, there are some \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "72",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Husain:2022:IEP,
author = "Fatemah Husain and Ozlem Uzuner",
title = "Investigating the Effect of Preprocessing {Arabic}
Text on Offensive Language and Hate Speech Detection",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "73:1--73:20",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3501398",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3501398",
abstract = "Preprocessing of input text can play a key role in
text classification by reducing dimensionality and
removing unnecessary content. This study aims to
investigate the impact of preprocessing on Arabic
offensive language classification. We explore six
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "73",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Gogoi:2022:LLR,
author = "Arjun Gogoi and Nomi Baruah",
title = "A Lemmatizer for Low-resource Languages: {WSD} and Its
Role in the {Assamese} Language",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "74:1--74:22",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502157",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3502157",
abstract = "The morphological variations of highly inflected
languages that appear in a text impede the progress of
computer processing and root word determination tasks
while extracting an abstract. As a remedy to this
difficulty, a lemmatization algorithm is \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "74",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Harrag:2022:AFN,
author = "Fouzi Harrag and Mohamed Khalil Djahli",
title = "{Arabic} Fake News Detection: a Fact Checking Based
Deep Learning Approach",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "75:1--75:34",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3501401",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3501401",
abstract = "Fake news stories can polarize society, particularly
during political events. They undermine confidence in
the media in general. Current NLP systems are still
lacking the ability to properly interpret and classify
Arabic fake news. Given the high stakes \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "75",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{P:2022:TSS,
author = "Jasir M. P. and Kannan Balakrishnan",
title = "Text-to-Speech Synthesis: Literature Review with an
Emphasis on {Malayalam} Language",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "76:1--76:56",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3501397",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3501397",
abstract = "Text-to-Speech Synthesis (TTS) is an active area of
research to generate synthetic speech from underlying
text. The identified syllables are uttered with proper
duration and prosody characteristics to emulate natural
speech. It falls under the category of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "76",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Qin:2022:MDS,
author = "Libo Qin and Fuxuan Wei and Minheng Ni and Yue Zhang
and Wanxiang Che and Yangming Li and Ting Liu",
title = "Multi-domain Spoken Language Understanding Using
Domain- and Task-aware Parameterization",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "77:1--77:17",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502198",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3502198",
abstract = "Spoken language understanding (SLU) has been addressed
as a supervised learning problem, where a set of
training data is available for each domain. However,
annotating data for a new domain can be both
financially costly and non-scalable. One existing
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "77",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Qin:2022:ACE,
author = "Yanxia Qin and Zhongqing Wang and Yue Zhang and Kehai
Chen and Min Zhang",
title = "Advancing {Chinese} Event Detection via Revisiting
Character Information",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "78:1--78:9",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502197",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3502197",
abstract = "Recently, character information has been successfully
introduced into the encoder-decoder event detection
model to relieve the trigger-word mismatch problem,
thus achieving impressive results in the languages
without natural delimiters (i.e., Chinese). \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "78",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Jain:2022:WSD,
author = "Goonjan Jain and D. K. Lobiyal",
title = "Word Sense Disambiguation using Cooperative Game
Theory and Fuzzy {Hindi} {WordNet} based on
{ConceptNet}",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "79:1--79:25",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502739",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3502739",
abstract = "Natural Language is fuzzy in nature. The fuzziness of
Hindi language was captured in the Fuzzy Hindi WordNet
(FHWN). FHWN assigned membership values to fuzzy
relationships by consulting experts from various
domains. However, these membership values need
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "79",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Manerkar:2022:KWC,
author = "Sanjana Manerkar and Kavita Asnani and Preeti
Ravindranath Khorjuvenkar and Shilpa Desai and Jyoti D.
Pawar",
title = "{Konkani WordNet}: Corpus-Based Enhancement using
Crowdsourcing",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "80:1--80:18",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3503156",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3503156",
abstract = "Konkani is one of the languages included in the eighth
schedule of the Indian constitution. It is the official
language of Goa and is spoken mainly in Goa and some
places in Karnataka and Kerala. Konkani WordNet or
Konkani Shabdamalem (komkani 'sabdamalem) \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "80",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Chen:2022:MMR,
author = "Junyi Chen and Lan Du and Ming Liu and Xiabing Zhou",
title = "{Mulan}: a Multiple Residual Article-Wise Attention
Network for Legal Judgment Prediction",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "81:1--81:15",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3503157",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3503157",
abstract = "Legal judgment prediction (LJP) is used to predict
judgment results based on the description of individual
legal cases. In order to be more suitable for actual
application scenarios in which the case has cited
multiple articles and has multiple charges, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "81",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Guo:2022:HNT,
author = "H. Guo and N. Dong and J. Y. Zhao and Y. F. Liu",
title = "Handwritten New {Tai Lue} Character Recognition Using
Convolutional Prior Features and Deep Variationally
Sparse {Gaussian} Process Modeling",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "82:1--82:25",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3506700",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3506700",
abstract = "New Tai Lue is widely used in Southwest China and
Southeast Asia. Hence, it is important to study related
handwritten character recognition. Considering the many
similar characters in handwritten New Tai Lue, this
paper proposes an offline handwritten New \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "82",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Mahajan:2022:WLS,
author = "Shilpa Mahajan and Rajneesh Rani",
title = "Word Level Script Identification Using Convolutional
Neural Network Enhancement for Scenic Images",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "83:1--83:29",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3506699",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3506699",
abstract = "Script identification from complex and colorful images
is an integral part of the text recognition and
classification system. Such images may contain twofold
challenges: (1) Challenges related to the camera like
blurring effect, non-uniform illumination \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "83",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Alshammari:2022:CNS,
author = "Nasser O. Alshammari and Fawaz D. Alharbi",
title = "Combining a Novel Scoring Approach with {Arabic}
Stemming Techniques for {Arabic} Chatbots Conversation
Engine",
journal = j-TALLIP,
volume = "21",
number = "4",
pages = "84:1--84:21",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3511215",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Wed May 18 08:42:14 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3511215",
abstract = "Arabic is recognized as one of the main languages
around the world. Many attempts and efforts have been
done to provide computing solutions to support the
language. Developing Arabic chatbots is still an
evolving research field and requires extra efforts
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "84",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Shang:2022:IHD,
author = "Rui Shang and Xia Li",
title = "Improved Heuristic Data Management and Protection
Algorithm for Digital {China} Cultural Datasets",
journal = j-TALLIP,
volume = "21",
number = "5",
pages = "85:1--85:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3394114",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Mar 17 07:33:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3394114",
abstract = "In the present scenario sustainable management and
protection of digital cultural datasets are considered
as a significant area of research. In the recent past,
the protection and management of cultural data are
facing several new challenges and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "85",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Canhasi:2022:AFN,
author = "Ercan Canhasi and Rexhep Shijaku and Erblin Berisha",
title = "{Albanian} Fake News Detection",
journal = j-TALLIP,
volume = "21",
number = "5",
pages = "86:1--86:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487288",
ISSN = "2375-4699 (print), 2375-4702 (electronic)",
ISSN-L = "2375-4699",
bibdate = "Fri Mar 17 07:33:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
URL = "https://dl.acm.org/doi/10.1145/3487288",
abstract = "Recent years have witnessed the vast increase of the
phenomenon known as the fake news. Among the main
reasons for this increase are the continuous growth of
internet and social media usage and the real-time
information dissemination opportunity offered
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
articleno = "86",
fjournal = "ACM Transactions on Asian and Low-Resource Language
Information Processing (TALLIP)",
journal-URL = "https://dl.acm.org/loi/tallip",
}
@Article{Ahmed:2022:FCS,
author = "Usman Ahmed and Jerry Chun-Wei Lin and Gautam
Srivastava",
title = "Fuzzy Contrast Set Based Deep Attention Network for
Lexical Analysis and Mental Health Treatment",
journal = j-TALLIP,
volume = "21",
number = "5",
pages = "87:1--87:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "ht