Valid HTML 4.0! Valid CSS!
%%% -*-BibTeX-*-
%%% ====================================================================
%%%  BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.34",
%%%     date            = "24 September 2024",
%%%     time            = "06:29:30 MDT",
%%%     filename        = "tallip.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "https://www.math.utah.edu/~beebe",
%%%     checksum        = "36958 29940 146468 1404623",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "ACM Transactions on Asian and Low-Resource
%%%                        Language Information Processing (TALLIP);
%%%                        bibliography; BibTeX; TALLIP",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        ACM Transactions on Asian and Low-Resource
%%%                        Language Information Processing (TALLIP)
%%%                        (CODEN none, ISSN 2375-4699 (print),
%%%                        2375-4702 (electronic)).  Publication began
%%%                        with volume 14, number 1, in 2015 as a
%%%                        continuation of the predecessor journal,
%%%                        ACM Transactions on Asian language
%%%                        information processing (TALIP), which is
%%%                        covered in a separate bibliography, talip.bib.
%%%
%%%                        The journal has a World Wide Web sites at
%%%
%%%                            https://dl.acm.org/journal/tallip
%%%                            https://dl.acm.org/loi/tallip
%%%
%%%                        At version 1.34, the year coverage looked
%%%                        like this:
%%%
%%%                             2015 (  19)    2019 (  48)    2023 ( 242)
%%%                             2016 (  43)    2020 (  88)    2024 ( 137)
%%%                             2017 (  23)    2021 ( 109)
%%%                             2018 (  27)    2022 ( 130)
%%%
%%%                             Article:        866
%%%
%%%                             Total entries:  866
%%%
%%%                        This bibliography has been constructed
%%%                        primarily from the publisher Web site.
%%%
%%%                        Numerous errors in the sources noted above
%%%                        have been corrected.  Spelling has been
%%%                        verified with the UNIX spell and GNU ispell
%%%                        programs using the exception dictionary
%%%                        stored in the companion file with extension
%%%                        .sok.
%%%
%%%                        BibTeX citation tags are uniformly chosen as
%%%                        name:year:abbrev, where name is the family
%%%                        name of the first author or editor, year is a
%%%                        4-digit number, and abbrev is a 3-letter
%%%                        condensation of important title words.
%%%                        Citation labels were automatically generated
%%%                        by software developed for the BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, with the help of
%%%                        ``bibsort -byvolume''.  The bibsort utility
%%%                        is available from ftp.math.utah.edu in
%%%                        /pub/tex/bib.
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility.",
%%%  }
%%% ====================================================================
@Preamble{
    "\hyphenation{ }"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:
@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|https://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:
@String{j-TALLIP                = "ACM Transactions on Asian and Low-Resource
                                  Language Information Processing (TALLIP)"}

%%% ====================================================================
%%% Bibliography entries:
@Article{Uematsu:2015:IMD,
  author =       "Sumire Uematsu and Takuya Matsuzaki and Hiroki Hanaoka
                 and Yusuke Miyao and Hideki Mima",
  title =        "Integrating Multiple Dependency Corpora for Inducing
                 Wide-Coverage {Japanese} {CCG} Resources",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2658997",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:48 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "A novel method to induce wide-coverage Combinatory
                 Categorial Grammar (CCG) resources for Japanese is
                 proposed in this article. For some languages including
                 English, the availability of large annotated corpora
                 and the development of data-based induction of
                 lexicalized grammar have enabled deep parsing, i.e.,
                 parsing based on lexicalized grammars. However, deep
                 parsing for Japanese has not been widely studied. This
                 is mainly because most Japanese syntactic resources are
                 represented in chunk-based dependency structures, while
                 previous methods for inducing grammars are dependent on
                 tree corpora. To translate syntactic information
                 presented in chunk-based dependencies to phrase
                 structures as accurately as possible, integration of
                 annotation from multiple dependency-based corpora is
                 proposed. Our method first integrates dependency
                 structures and predicate-argument information and
                 converts them into phrase structure trees. The trees
                 are then transformed into CCG derivations in a similar
                 way to previously proposed methods. The quality of the
                 conversion is empirically evaluated in terms of the
                 coverage of the obtained CCG lexicon and the accuracy
                 of the parsing with the grammar. While the transforming
                 process used in this study is specialized for Japanese,
                 the framework of our method would be applicable to
                 other languages for which dependency-based analysis has
                 been regarded as more appropriate than phrase
                 structure-based analysis due to morphosyntactic
                 features.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ramrakhiyani:2015:ATE,
  author =       "Nitin Ramrakhiyani and Prasenjit Majumder",
  title =        "Approaches to Temporal Expression Recognition in
                 {Hindi}",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629574",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:48 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Temporal annotation of plain text is considered a
                 useful component of modern information retrieval tasks.
                 In this work, different approaches for identification
                 and classification of temporal expressions in Hindi are
                 developed and analyzed. First, a rule-based approach is
                 developed, which takes plain text as input and based on
                 a set of hand-crafted rules, produces a tagged output
                 with identified temporal expressions. This approach
                 performs with a strict F1-measure of 0.83. In another
                 approach, a CRF-based classifier is trained with human
                 tagged data and is then tested on a test dataset. The
                 trained classifier identifies the time expressions from
                 plain text and further classifies them to various
                 classes. This approach performs with a strict
                 F1-measure of 0.78. Next, the CRF is replaced by an
                 SVM-based classifier and the same experiment is
                 performed with the same features. This approach is
                 shown to be comparable to the CRF and performs with a
                 strict F1-measure of 0.77. Using the rule base
                 information as an additional feature enhances the
                 performances to 0.86 and 0.84 for the CRF and SVM
                 respectively. With three different comparable systems
                 performing the extraction task, merging them to take
                 advantage of their positives is the next step. As the
                 first merge experiment, rule-based tagged data is fed
                 to the CRF and SVM classifiers as additional training
                 data. Evaluation results report an increase in
                 F1-measure of the CRF from 0.78 to 0.8. Second, a
                 voting-based approach is implemented, which chooses the
                 best class for each token from the outputs of the three
                 approaches. This approach results in the best
                 performance for this task with a strict F1-measure of
                 0.88. In this process a reusable gold standard dataset
                 for temporal tagging in Hindi is also developed. Named
                 the ILTIMEX2012 corpus, it consists of 300 manually
                 tagged Hindi news documents.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kumari:2015:ITD,
  author =       "B. Venkata Seshu Kumari and Ramisetty Rajeshwara Rao",
  title =        "Improving {Telugu} Dependency Parsing using
                 Combinatory Categorial Grammar Supertags",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2693190.2693191",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:48 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "We show that Combinatory Categorial Grammar (CCG)
                 supertags can improve Telugu dependency parsing. In
                 this process, we first extract a CCG lexicon from the
                 dependency treebank. Using both the CCG lexicon and the
                 dependency treebank, we create a CCG treebank using a
                 chart parser. Exploring different morphological
                 features of Telugu, we develop a supertagger using
                 maximum entropy models. We provide CCG supertags as
                 features to the Telugu dependency parser (MST parser).
                 We get an improvement of 1.8\% in the unlabelled
                 attachment score and 2.2\% in the labelled attachment
                 score. Our results show that CCG supertags improve the
                 MST parser, especially on verbal arguments for which it
                 has weak rates of recovery.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ketui:2015:EBA,
  author =       "Nongnuch Ketui and Thanaruk Theeramunkong and
                 Chutamanee Onsuwan",
  title =        "An {EDU}-Based Approach for {Thai} Multi-Document
                 Summarization and Its Application",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2641567",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:48 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Due to lack of a word/phrase/sentence boundary,
                 summarization of Thai multiple documents has several
                 challenges in unit segmentation, unit selection,
                 duplication elimination, and evaluation dataset
                 construction. In this article, we introduce Thai
                 Elementary Discourse Units (TEDUs) and their
                 derivatives, called Combined TEDUs (CTEDUs), and then
                 present our three-stage method of Thai multi-document
                 summarization, that is, unit segmentation, unit-graph
                 formulation, and unit selection and summary generation.
                 To examine performance of our proposed method, a number
                 of experiments are conducted using 50 sets of Thai news
                 articles with their manually constructed reference
                 summaries. Based on measures of ROUGE-1, ROUGE-2, and
                 ROUGE-SU4, the experimental results show that: (1) the
                 TEDU-based summarization outperforms paragraph-based
                 summarization; (2) our proposed graph-based TEDU
                 weighting with importance-based selection achieves the
                 best performance; and (3) unit duplication
                 consideration and weight recalculation help improve
                 summary quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sproat:2015:TPE,
  author =       "Richard Sproat",
  title =        "{TALLIP} Perspectives: Editorial Commentary: The
                 Broadened Focus of the Journal",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2710043",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:48 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Shen:2015:MGA,
  author =       "Han-ping Shen and Chung-hsien Wu and Pei-shan Tsai",
  title =        "Model Generation of Accented Speech using Model
                 Transformation and Verification for Bilingual Speech
                 Recognition",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "2",
  pages =        "6:1--6:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2661637",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Nowadays, bilingual or multilingual speech recognition
                 is confronted with the accent-related problem caused by
                 non-native speech in a variety of real-world
                 applications. Accent modeling of non-native speech is
                 definitely challenging, because the acoustic properties
                 in highly-accented speech pronounced by non-native
                 speakers are quite divergent. The aim of this study is
                 to generate highly Mandarin-accented English models for
                 speakers whose mother tongue is Mandarin. First, a
                 two-stage, state-based verification method is proposed
                 to extract the state-level, highly-accented speech
                 segments automatically. Acoustic features and
                 articulatory features are successively used for robust
                 verification of the extracted speech segments. Second,
                 Gaussian components of the highly-accented speech
                 models are generated from the corresponding Gaussian
                 components of the native speech models using a linear
                 transformation function. A decision tree is constructed
                 to categorize the transformation functions and used for
                 transformation function retrieval to deal with the data
                 sparseness problem. Third, a discrimination function is
                 further applied to verify the generated accented
                 acoustic models. Finally, the successfully verified
                 accented English models are integrated into the native
                 bilingual phone model set for Mandarin-English
                 bilingual speech recognition. Experimental results show
                 that the proposed approach can effectively alleviate
                 recognition performance degradation due to accents and
                 can obtain absolute improvements of 4.1\%, 1.8\%, and
                 2.7\% in word accuracy for bilingual speech recognition
                 compared to that using traditional ASR approaches,
                 MAP-adapted, and MLLR-adapted ASR methods,
                 respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Awajan:2015:KEA,
  author =       "Arafat Awajan",
  title =        "Keyword Extraction from {Arabic} Documents using Term
                 Equivalence Classes",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "2",
  pages =        "7:1--7:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2665077",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "The rapid growth of the Internet and other computing
                 facilities in recent years has resulted in the creation
                 of a large amount of text in electronic form, which has
                 increased the interest in and importance of different
                 automatic text processing applications, including
                 keyword extraction and term indexing. Although keywords
                 are very useful for many applications, most documents
                 available online are not provided with keywords. We
                 describe a method for extracting keywords from Arabic
                 documents. This method identifies the keywords by
                 combining linguistics and statistical analysis of the
                 text without using prior knowledge from its domain or
                 information from any related corpus. The text is
                 preprocessed to extract the main linguistic
                 information, such as the roots and morphological
                 patterns of derivative words. A cleaning phase is then
                 applied to eliminate the meaningless words from the
                 text. The most frequent terms are clustered into
                 equivalence classes in which the derivative words
                 generated from the same root and the non-derivative
                 words generated from the same stem are placed together,
                 and their count is accumulated. A vector space model is
                 then used to capture the most frequent N-gram in the
                 text. Experiments carried out using a real-world
                 dataset show that the proposed method achieves good
                 results with an average precision of 31\% and average
                 recall of 53\% when tested against manually assigned
                 keywords.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sundaram:2015:BLM,
  author =       "Suresh Sundaram and A. G. Ramakrishnan",
  title =        "Bigram Language Models and Reevaluation Strategy for
                 Improved Recognition of Online Handwritten {Tamil}
                 Words",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "2",
  pages =        "8:1--8:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2671014",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article describes a postprocessing strategy for
                 online, handwritten, isolated Tamil words.
                 Contributions have been made with regard to two issues
                 hardly addressed in the online Indic word recognition
                 literature, namely, use of (1) language models
                 exploiting the idiosyncrasies of Indic scripts and (2)
                 expert classifiers for the disambiguation of confused
                 symbols. The input word is first segmented into its
                 individual symbols, which are recognized using a
                 primary support vector machine (SVM) classifier.
                 Thereafter, we enhance the recognition accuracy by
                 utilizing (i) a bigram language model at the symbol or
                 character level and (ii) expert classifiers for
                 reevaluating and disambiguating the different sets of
                 confused symbols. The symbol-level bigram model is used
                 in a traditional Viterbi framework. The concept of a
                 character comprising multiple symbols is unique to
                 Dravidian languages such as Tamil. This multi-symbol
                 feature of Tamil characters has been exploited in
                 proposing a novel, prefix-tree-based character-level
                 bigram model that does not use Viterbi search; rather
                 it reduces the search space for each input symbol based
                 on its left context. For disambiguating confused
                 symbols, a dynamic time-warping approach is proposed to
                 automatically identify the parts of the online trace
                 that discriminates between the confused classes. Fine
                 classification of these regions by dedicated expert
                 SVMs reduces the extent of confusions between such
                 symbols. The integration of segmentation,
                 prefix-tree-based language model and disambiguation of
                 confused symbols is presented on a set of 15,000
                 handwritten isolated online Tamil words. Our results
                 show recognition accuracies of 93.0\% and 81.6\% at the
                 symbol and word level, respectively, as compared to the
                 baseline classifier performance of 88.4\% and 65.1\%,
                 respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhang:2015:TMT,
  author =       "Jiajun Zhang and Shujie Liu and Mu Li and Ming Zhou
                 and Chengqing Zong",
  title =        "Towards Machine Translation in Semantic Vector Space",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "2",
  pages =        "9:1--9:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2699927",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Measuring the quality of the translation rules and
                 their composition is an essential issue in the
                 conventional statistical machine translation (SMT)
                 framework. To express the translation quality, the
                 previous lexical and phrasal probabilities are
                 calculated only according to the co-occurrence
                 statistics in the bilingual corpus and may be not
                 reliable due to the data sparseness problem. To address
                 this issue, we propose measuring the quality of the
                 translation rules and their composition in the semantic
                 vector embedding space (VES). We present a recursive
                 neural network (RNN)-based translation framework, which
                 includes two submodels. One is the
                 bilingually-constrained recursive auto-encoder, which
                 is proposed to convert the lexical translation rules
                 into compact real-valued vectors in the semantic VES.
                 The other is a type-dependent recursive neural network,
                 which is proposed to perform the decoding process by
                 minimizing the semantic gap (meaning distance) between
                 the source language string and its translation
                 candidates at each state in a bottom-up structure. The
                 RNN-based translation model is trained using a
                 max-margin objective function that maximizes the margin
                 between the reference translation and the n-best
                 translations in forced decoding. In the experiments, we
                 first show that the proposed vector representations for
                 the translation rules are very reliable for application
                 in translation modeling. We further show that the
                 proposed type-dependent, RNN-based model can
                 significantly improve the translation quality in the
                 large-scale, end-to-end Chinese-to-English translation
                 evaluation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Na:2015:CRF,
  author =       "Seung-Hoon Na",
  title =        "Conditional Random Fields for {Korean} Morpheme
                 Segmentation and {POS} Tagging",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "3",
  pages =        "10:1--10:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700051",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "There has been recent interest in statistical
                 approaches to Korean morphological analysis. However,
                 previous studies have been based mostly on generative
                 models, including a hidden Markov model (HMM), without
                 utilizing discriminative models such as a conditional
                 random field (CRF). We present a two-stage
                 discriminative approach based on CRFs for Korean
                 morphological analysis. Similar to methods used for
                 Chinese, we perform two disambiguation procedures based
                 on CRFs: (1) morpheme segmentation and (2) POS tagging.
                 In morpheme segmentation, an input sentence is
                 segmented into sequences of morphemes, where a morpheme
                 unit is either atomic or compound. In the POS tagging
                 procedure, each morpheme (atomic or compound) is
                 assigned a POS tag. Once POS tagging is complete, we
                 carry out a post-processing of the compound morphemes,
                 where each compound morpheme is further decomposed into
                 atomic morphemes, which is based on pre-analyzed
                 patterns and generalized HMMs obtained from the given
                 tagged corpus. Experimental results show the promise of
                 our proposed method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2015:MTM,
  author =       "Xiaodong Liu and Kevin Duh and Yuji Matsumoto",
  title =        "Multilingual Topic Models for Bilingual Dictionary
                 Extraction",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "3",
  pages =        "11:1--11:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2699939",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "A machine-readable bilingual dictionary plays a
                 crucial role in many natural language processing tasks,
                 such as statistical machine translation and
                 cross-language information retrieval. In this article,
                 we propose a framework for extracting a bilingual
                 dictionary from comparable corpora by exploiting a
                 novel combination of topic modeling and word aligners
                 such as the IBM models. Using a multilingual topic
                 model, we first convert a comparable document -aligned
                 corpus into a parallel topic -aligned corpus. This
                 novel topic-aligned corpus is similar in structure to
                 the sentence -aligned corpus frequently employed in
                 statistical machine translation and allows us to
                 extract a bilingual dictionary using a word alignment
                 model. The main advantages of our framework is that (1)
                 no seed dictionary is necessary for bootstrapping the
                 process, and (2) multilingual comparable corpora in
                 more than two languages can also be exploited. In our
                 experiments on a large-scale Wikipedia dataset, we
                 demonstrate that our approach can extract higher
                 precision dictionaries compared to previous approaches
                 and that our method improves further as we add more
                 languages to the dataset.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2015:UMS,
  author =       "Xiaoqing Li and Chengqing Zong and Keh-yih Su",
  title =        "A Unified Model for Solving the {OOV} Problem of
                 {Chinese} Word Segmentation",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "3",
  pages =        "12:1--12:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2699940",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article proposes a unified, character-based,
                 generative model to incorporate additional resources
                 for solving the out-of-vocabulary (OOV) problem of
                 Chinese word segmentation, within which different types
                 of additional information can be utilized independently
                 in corresponding submodels. This article mainly
                 addresses the following three types of OOV: unseen
                 dictionary words, named entities, and suffix-derived
                 words, none of which are handled well by current
                 approaches. The results show that our approach can
                 effectively improve the performance of the first two
                 types with positive interaction in F-score.
                 Additionally, we also analyze reason that suffix
                 information is not helpful. After integrating the
                 proposed generative model with the corresponding
                 discriminative approach, our evaluation on various
                 corpora---including SIGHAN-2005, CIPS-SIGHAN-2010, and
                 the Chinese Treebank (CTB)---shows that our integrated
                 approach achieves the best performance reported in the
                 literature on all testing sets when additional
                 information and resources are allowed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Goto:2015:PUT,
  author =       "Isao Goto and Masao Utiyama and Eiichiro Sumita and
                 Sadao Kurohashi",
  title =        "Preordering using a Target-Language Parser via
                 Cross-Language Syntactic Projection for Statistical
                 Machine Translation",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "3",
  pages =        "13:1--13:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2699925",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "When translating between languages with widely
                 different word orders, word reordering can present a
                 major challenge. Although some word reordering methods
                 do not employ source-language syntactic structures,
                 such structures are inherently useful for word
                 reordering. However, high-quality syntactic parsers are
                 not available for many languages. We propose a
                 preordering method using a target-language syntactic
                 parser to process source-language syntactic structures
                 without a source-language syntactic parser. To train
                 our preordering model based on ITG, we produced
                 syntactic constituent structures for source-language
                 training sentences by (1) parsing target-language
                 training sentences, (2) projecting constituent
                 structures of the target-language sentences to the
                 corresponding source-language sentences, (3) selecting
                 parallel sentences with highly synchronized parallel
                 structures, (4) producing probabilistic models for
                 parsing using the projected partial structures and the
                 Pitman-Yor process, and (5) parsing to produce full
                 binary syntactic structures maximally synchronized with
                 the corresponding target-language syntactic structures,
                 using the constraints of the projected partial
                 structures and the probabilistic models. Our ITG-based
                 preordering model is trained using the produced binary
                 syntactic structures and word alignments. The proposed
                 method facilitates the learning of ITG by producing
                 highly synchronized parallel syntactic structures based
                 on cross-language syntactic projection and sentence
                 selection. The preordering model jointly parses input
                 sentences and identifies their reordered structures.
                 Experiments with Japanese--English and Chinese--English
                 patent translation indicate that our method outperforms
                 existing methods, including string-to-tree syntax-based
                 SMT, a preordering method that does not require a
                 parser, and a preordering method that uses a
                 source-language dependency parser.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Costa-Jussa:2016:DCS,
  author =       "Marta R. Costa-Juss{\`a} and Jordi Centelles",
  title =        "Description of the {Chinese}-to-{Spanish} Rule-Based
                 Machine Translation System Developed Using a Hybrid
                 Combination of Human Annotation and Statistical
                 Techniques",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2738045",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Two of the most popular Machine Translation (MT)
                 paradigms are rule based (RBMT) and corpus based, which
                 include the statistical systems (SMT). When scarce
                 parallel corpus is available, RBMT becomes particularly
                 attractive. This is the case of the Chinese--Spanish
                 language pair. This article presents the first RBMT
                 system for Chinese to Spanish. We describe a hybrid
                 method for constructing this system taking advantage of
                 available resources such as parallel corpora that are
                 used to extract dictionaries and lexical and structural
                 transfer rules. The final system is freely available
                 online and open source. Although performance lags
                 behind standard SMT systems for an in-domain test set,
                 the results show that the RBMT's coverage is
                 competitive and it outperforms the SMT system in an
                 out-of-domain test set. This RBMT system is available
                 to the general public, it can be further enhanced, and
                 it opens up the possibility of creating future hybrid
                 MT systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Khanduja:2016:HFE,
  author =       "Deepti Khanduja and Neeta Nain and Subhash Panwar",
  title =        "A Hybrid Feature Extraction Algorithm for {Devanagari}
                 Script",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2710018",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "The efficiency of any character recognition technique
                 is directly dependent on the accuracy of the generated
                 feature set that could uniquely represent a character
                 and hence correctly recognize it. This article proposes
                 a hybrid approach combining the structural features of
                 the character and a mathematical model of curve fitting
                 to simulate the best features of a character. As a
                 preprocessing step, skeletonization of the character is
                 performed using an iterative thinning algorithm based
                 on Raster scan of the character image. Then, a
                 combination of structural features of the character
                 like number of endpoints, loops, and intersection
                 points is calculated. Further, the thinned character
                 image is statistically zoned into partitions, and a
                 quadratic curve-fitting model is applied on each
                 partition forming a feature vector of the coefficients
                 of the optimally fitted curve. This vector is combined
                 with the spatial distribution of the foreground pixels
                 for each zone and hence script-independent feature
                 representation. The approach has been evaluated
                 experimentally on Devanagari scripts. The algorithm
                 achieves an average recognition accuracy of 93.4\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Shatnawi:2016:IHA,
  author =       "Maad Shatnawi and Sherief Abdallah",
  title =        "Improving Handwritten {Arabic} Character Recognition
                 by Modeling Human Handwriting Distortions",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2764456",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Handwritten Arabic character recognition systems face
                 several challenges, including the unlimited variation
                 in human handwriting and the unavailability of large
                 public databases of handwritten characters and words.
                 The use of synthetic data for training and testing
                 handwritten character recognition systems is one of the
                 possible solutions to provide several variations for
                 these characters and to overcome the lack of large
                 databases. While this can be using arbitrary
                 distortions, such as image noise and randomized affine
                 transformations, such distortions are not realistic. In
                 this work, we model real distortions in handwriting
                 using real handwritten Arabic character examples and
                 then use these distortion models to synthesize
                 handwritten examples that are more realistic. We show
                 that the use of our proposed approach leads to
                 significant improvements across different
                 machine-learning classification algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wushouer:2016:CAP,
  author =       "Mairidan Wushouer and Donghui Lin and Toru Ishida and
                 Katsutoshi Hirayama",
  title =        "A Constraint Approach to Pivot-Based Bilingual
                 Dictionary Induction",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2723144",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "High-quality bilingual dictionaries are very useful,
                 but such resources are rarely available for
                 lower-density language pairs, especially for those that
                 are closely related. Using a third language to link two
                 other languages is a well-known solution and usually
                 requires only two input bilingual dictionaries A-B and
                 B-C to automatically induce the new one, A-C. This
                 approach, however, has never been demonstrated to
                 utilize the complete structures of the input bilingual
                 dictionaries, and this is a key failing because the
                 dropped meanings negatively influence the result. This
                 article proposes a constraint approach to pivot-based
                 dictionary induction where language A and C are closely
                 related. We create constraints from language similarity
                 and model the structures of the input dictionaries as a
                 Boolean optimization problem, which is then formulated
                 within the Weighted Partial Max-SAT framework, an
                 extension of Boolean Satisfiability (SAT). All of the
                 encoded CNF (Conjunctive Normal Form), the predominant
                 input language of modern SAT/MAX-SAT solvers, formulas
                 are evaluated by a solver to produce the target
                 (output) bilingual dictionary. Moreover, we discuss
                 alternative formalizations as a comparison study. We
                 designed a tool that uses the Sat4j library as the
                 default solver to implement our method and conducted an
                 experiment in which the output bilingual dictionary
                 achieved better quality than the baseline method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yeh:2016:SAI,
  author =       "Jui-Feng Yeh",
  title =        "Speech Act Identification Using Semantic Dependency
                 Graphs with Probabilistic Context-Free Grammars",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2786978",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "We propose an approach for identifying the speech acts
                 of speakers' utterances in conversational spoken
                 dialogue that involves using semantic dependency graphs
                 with probabilistic context-free grammars (PCFGs). The
                 semantic dependency graph based on the HowNet knowledge
                 base is adopted to model the relationships between
                 words in an utterance parsed by PCFG. Dependency
                 relationships between words within the utterance are
                 extracted by decomposing the semantic dependency graph
                 according to predefined events. The corresponding
                 values of semantic slots are subsequently extracted
                 from the speaker's utterances according to the
                 corresponding identified speech act. The experimental
                 results obtained when using the proposed approach
                 indicated that the accuracy rates of speech act
                 detection and task completion were 95.6\% and 77.4\%
                 for human-generated transcription (REF) and
                 speech-to-text recognition output (STT), respectively,
                 and the average numbers of turns of each dialogue were
                 8.3 and 11.8 for REF and STT, respectively. Compared
                 with Bayes classifier, partial pattern tree, and
                 Bayesian-network-based approaches, we obtained 14.1\%,
                 9.2\%, and 3\% improvements in the accuracy of speech
                 act identification, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2016:CCSa,
  author =       "Ting-Xuan Wang and Wen-Hsiang Lu",
  title =        "Constructing Complex Search Tasks with Coherent
                 Subtask Search Goals",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "2",
  pages =        "6:1--6:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2742547",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Nowadays, due to the explosive growth of web content
                 and usage, users deal with their complex search tasks
                 by web search engines. However, conventional search
                 engines consider a search query corresponding only to a
                 simple search task. In order to accomplish a complex
                 search task, which consists of multiple subtask search
                 goals, users usually have to issue a series of queries.
                 For example, the complex search task ``travel to
                 Dubai'' may involve several subtask search goals,
                 including reserving hotel room, surveying Dubai
                 landmarks, booking flights, and so forth. Therefore, a
                 user can efficiently accomplish his or her complex
                 search task if search engines can predict the complex
                 search task with a variety of subtask search goals. In
                 this work, we propose a complex search task model
                 (CSTM) to deal with this problem. The CSTM first groups
                 queries into complex search task clusters, and then
                 generates subtask search goals from each complex search
                 task cluster. To raise the performance of CSTM, we
                 exploit four web resources including community question
                 answering, query logs, search engine result pages, and
                 clicked pages. Experimental results show that our CSTM
                 is effective in identifying the comprehensive subtask
                 search goals of a complex search task.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Tsai:2016:CWB,
  author =       "Richard Tzong-Han Tsai",
  title =        "Collective {Web}-Based Parenthetical Translation
                 Extraction Using {Markov} Logic Networks",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "2",
  pages =        "7:1--7:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2794399",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Parenthetical translations are translations of terms
                 in otherwise monolingual text that appear inside
                 parentheses. Parenthetical translations extraction
                 (PTE) is the task of extracting parenthetical
                 translations from natural language documents. One of
                 the main difficulties in PTE is to detect the left
                 boundary of the translated term in preparenthetical
                 text. In this article, we propose a collective approach
                 that employs Markov logic to model multiple constraints
                 used in the PTE task. We show how various constraints
                 can be formulated and combined in a Markov logic
                 network (MLN). Our experimental results show that the
                 proposed collective PTE approach significantly
                 outperforms a current state-of-the-art method,
                 improving the average F-measure up to 27.11\% compared
                 to the previous word alignment approach. It also
                 outperforms an individual MLN-based system by 8.2\% and
                 a system based on conditional random fields by 5.9\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jain:2016:FHW,
  author =       "Amita Jain and D. K. Lobiyal",
  title =        "Fuzzy {Hindi} {WordNet} and Word Sense Disambiguation
                 Using Fuzzy Graph Connectivity Measures",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "2",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2790079",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In this article, we propose Fuzzy Hindi WordNet, which
                 is an extended version of Hindi WordNet. The proposed
                 idea of fuzzy relations and their role in modeling
                 Fuzzy Hindi WordNet is explained. We mathematically
                 define fuzzy relations and the composition of these
                 fuzzy relations for this extended version. We show that
                 the concept of composition of fuzzy relations can be
                 used to infer a relation between two words that
                 otherwise are not directly related in Hindi WordNet.
                 Then we propose fuzzy graph connectivity measures that
                 include both local and global measures. These measures
                 are used in determining the significance of a concept
                 (which is represented as a vertex in the fuzzy graph)
                 in a specific context. Finally, we show how these
                 extended measures solve the problem of word sense
                 disambiguation (WSD) effectively, which is useful in
                 many natural language processing applications to
                 improve their performance. Experiments on standard
                 sense tagged corpus for WSD show better results when
                 Fuzzy Hindi WordNet is used in place of Hindi
                 WordNet.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kertkeidkachorn:2016:AFH,
  author =       "Natthawut Kertkeidkachorn and Proadpran Punyabukkana
                 and Atiwong Suchato",
  title =        "Acoustic Features for Hidden Conditional Random
                 Fields-Based {Thai} Tone Classification",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "2",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2833088",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In the Thai language, tone information is necessary
                 for Thai speech recognition systems. Previous studies
                 show that many acoustic cues are attributed to shapes
                 of tones. Nevertheless, most Thai tone classification
                 studies mainly adopted F$_0$ values and their
                 derivatives without considering other acoustic
                 features. In this article, other acoustic features for
                 Thai tone classification are investigated. In the
                 experiment, energy values and spectral information
                 represented by three spectral-based features including
                 the LPC-based feature, PLP-based feature, and
                 MFCC-based feature are applied to the HCRF-based Thai
                 tone classification, which was reported as the best
                 approach for Thai tone classification. The energy
                 values provide an error rate reduction of 22.40\% in
                 the isolated word scenario, while there are slight
                 improvements in the continuous speech scenario. On the
                 contrary, spectral-based features greatly contribute to
                 Thai tone classification in the continuous-speech
                 scenario, whereas spectral-based features slightly
                 degrade performances in the isolated-word scenario. The
                 best achievement in the continuous-speech scenario is
                 obtained from the PLP-based feature, which yields an
                 error rate reduction of 13.90\%. Therefore, findings in
                 this article are that energy values and spectral-based
                 features, especially the PLP-based feature, are the
                 main contributors to the improvement of the
                 performances of Thai tone classification in the
                 isolated-word scenario and the continuous-speech
                 scenario, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Chu:2016:IPS,
  author =       "Chenhui Chu and Toshiaki Nakazawa and Sadao
                 Kurohashi",
  title =        "Integrated Parallel Sentence and Fragment Extraction
                 from Comparable Corpora: a Case Study on
                 {Chinese--Japanese} {Wikipedia}",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "2",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2833089",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Parallel corpora are crucial for statistical machine
                 translation (SMT); however, they are quite scarce for
                 most language pairs and domains. As comparable corpora
                 are far more available, many studies have been
                 conducted to extract either parallel sentences or
                 fragments from them for SMT. In this article, we
                 propose an integrated system to extract both parallel
                 sentences and fragments from comparable corpora. We
                 first apply parallel sentence extraction to identify
                 parallel sentences from comparable sentences. We then
                 extract parallel fragments from the comparable
                 sentences. Parallel sentence extraction is based on a
                 parallel sentence candidate filter and classifier for
                 parallel sentence identification. We improve it by
                 proposing a novel filtering strategy and three novel
                 feature sets for classification. Previous studies have
                 found it difficult to accurately extract parallel
                 fragments from comparable sentences. We propose an
                 accurate parallel fragment extraction method that uses
                 an alignment model to locate the parallel fragment
                 candidates and an accurate lexicon-based filter to
                 identify the truly parallel fragments. A case study on
                 the Chinese--Japanese Wikipedia indicates that our
                 proposed methods outperform previously proposed
                 methods, and the parallel data extracted by our system
                 significantly improves SMT performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2016:CCSb,
  author =       "Rui Wang and Masao Utiyama and Isao Goto and Eiichiro
                 Sumita and Hai Zhao and Bao-Liang Lu",
  title =        "Converting Continuous-Space Language Models into
                 {$N$}-gram Language Models with Efficient Bilingual
                 Pruning for Statistical Machine Translation",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "3",
  pages =        "11:1--11:??",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2843942",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "The Language Model (LM) is an essential component of
                 Statistical Machine Translation (SMT). In this article,
                 we focus on developing efficient methods for LM
                 construction. Our main contribution is that we propose
                 a Natural N -grams based Converting (NNGC) method for
                 transforming a Continuous-Space Language Model (CSLM)
                 to a Back-off N -gram Language Model (BNLM).
                 Furthermore, a Bilingual LM Pruning (BLMP) approach is
                 developed for enhancing LMs in SMT decoding and
                 speeding up CSLM converting. The proposed pruning and
                 converting methods can convert a large LM efficiently
                 by working jointly. That is, a LM can be effectively
                 pruned before it is converted from CSLM without
                 sacrificing performance, and further improved if an
                 additional corpus contains out-of-domain information.
                 For different SMT tasks, our experimental results
                 indicate that the proposed NNGC and BLMP methods
                 outperform the existing counterpart approaches
                 significantly in BLEU and computational cost.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Chakrabarty:2016:BBL,
  author =       "Abhisek Chakrabarty and Utpal Garain",
  title =        "{BenLem} (A {Bengali} Lemmatizer) and Its Role in
                 {WSD}",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "3",
  pages =        "12:1--12:??",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2835494",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "A lemmatization algorithm for Bengali has been
                 developed and evaluated. Its effectiveness for word
                 sense disambiguation (WSD) is also investigated. One of
                 the key challenges for computer processing of highly
                 inflected languages is to deal with the frequent
                 morphological variations of the root words appearing in
                 the text. Therefore, a lemmatizer is essential for
                 developing natural language processing (NLP) tools for
                 such languages. In this experiment, Bengali, which is
                 the national language of Bangladesh and the second most
                 popular language in the Indian subcontinent, has been
                 taken as a reference. In order to design the Bengali
                 lemmatizer (named as BenLem), possible transformations
                 through which surface words are formed from lemmas are
                 studied so that appropriate reverse transformations can
                 be applied on a surface word to get the corresponding
                 lemma back. BenLem is found to be capable of handling
                 both inflectional and derivational morphology in
                 Bengali. It is evaluated on a set of 18 news articles
                 taken from the FIRE Bengali News Corpus consisting of
                 3,342 surface words (excluding proper nouns) and found
                 to be 81.95\% accurate. The role of the lemmatizer is
                 then investigated for Bengali WSD. Ten highly
                 polysemous Bengali words are considered for sense
                 disambiguation. The FIRE corpus and a collection of
                 Tagore's short stories are considered for creating the
                 WSD dataset. Different WSD systems are considered for
                 this experiment, and it is noticed that BenLem improves
                 the performance of all the WSD systems and the
                 improvements are statistically significant.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhou:2016:ESR,
  author =       "Hao Zhou and Shujian Huang and Junsheng Zhou and Yue
                 Zhang and Huadong Chen and Xinyu Dai and Chuan Cheng
                 and Jiajun Chen",
  title =        "Enhancing Shift--Reduce Constituent Parsing with
                 Action {$N$}-Gram Model",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "3",
  pages =        "13:1--13:??",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2820902",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Current shift-reduce parsers ``understand'' the
                 context by embodying a large number of binary indicator
                 features with a discriminative model. In this article,
                 we propose the action n-gram model, which utilizes the
                 action sequence to help parsing disambiguation. The
                 action n-gram model is trained on action sequences
                 produced by parsers with the n-gram estimation method,
                 which gives a smoothed maximum likelihood estimation of
                 the action probability given a specific action history.
                 We show that incorporating action n-gram models into a
                 state-of-the-art parsing framework could achieve
                 parsing accuracy improvements on three datasets across
                 two languages.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sadek:2016:EAC,
  author =       "Jawad Sadek and Farid Meziane",
  title =        "Extracting {Arabic} Causal Relations Using Linguistic
                 Patterns",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "3",
  pages =        "14:1--14:??",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2800786",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Identifying semantic relations is a crucial step in
                 discourse analysis and is useful for many applications
                 in both language and speech technology. Automatic
                 detection of Causal relations therefore has gained
                 popularity in the literature within different
                 frameworks. The aim of this article is the automatic
                 detection and extraction of Causal relations that are
                 explicitly expressed in Arabic texts. To fulfill this
                 goal, a Pattern Recognizer model was developed to
                 signal the presence of cause--effect information within
                 sentences from nonspecific domain texts. This model
                 incorporates approximately 700 linguistic patterns so
                 that parts of the sentence representing the cause and
                 those representing the effect can be distinguished. The
                 patterns were constructed based on different sets of
                 syntactic features by analyzing a large untagged Arabic
                 corpus. In addition, the model was boosted with three
                 independent algorithms to deal with certain types of
                 grammatical particles that indicate causation. With
                 this approach, the proposed model achieved an overall
                 recall of 81\% and a precision of 78\%. Evaluation
                 results revealed that the justification particles play
                 a key role in detecting Causal relations. To the best
                 of our knowledge, no previous studies have been
                 dedicated to dealing with this type of relation in the
                 Arabic language.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yang:2016:BSR,
  author =       "Haitong Yang and Yu Zhou and Chengqing Zong",
  title =        "Bilingual Semantic Role Labeling Inference via Dual
                 Decomposition",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "3",
  pages =        "15:1--15:??",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2835493",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article focuses on bilingual Semantic Role
                 Labeling (SRL); its goal is to annotate semantic roles
                 on both sides of the parallel bilingual texts
                 (bi-texts). Since rich bilingual information is
                 encoded, bilingual SRL has been applied in many
                 natural-language processing (NLP) tasks such as machine
                 translation (MT), cross-lingual information retrieval
                 (IR), and the like. A feasible way of performing
                 bilingual SRL is using monolingual SRL systems to
                 perform SRL on each side of bi-texts separately.
                 However, it is difficult to obtain consistent SRL
                 results on both sides of bi-texts in this way. Some
                 works have tried to jointly infer bilingual SRL because
                 there are many complementary language cues on both
                 sides of bi-texts and they reported better performance
                 than monolingual systems. However, there are two limits
                 in the existing methods. First, the existing methods
                 often require high inference costs due to the complex
                 objective function. Second, the existing methods fully
                 adopt the candidates generated by monolingual SRL
                 systems, but many candidates are discarded in the
                 argument pruning or identification stage of monolingual
                 systems. In this article, we propose two strategies to
                 overcome these limits. We utilize a simple but
                 efficient technique: Dual Decomposition to search for
                 consistent results for both sides of bi-texts. On the
                 other hand, we propose a method called Bi-Directional
                 Projection (BDP) to recover arguments discarded in
                 monolingual SRL systems. We evaluate our method on a
                 standard parallel benchmark: the OntoNotes dataset. The
                 experimental results show that our method yields
                 significant improvements over the state-of-the-art
                 monolingual systems. In addition, our approach is also
                 better and faster than existing methods due to BDP and
                 Dual Decomposition.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2016:MMC,
  author =       "Maoxi Li and Mingwen Wang and Hanxi Li and Fan Xu",
  title =        "Modeling Monolingual Character Alignment for Automatic
                 Evaluation of {Chinese} Translation",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "3",
  pages =        "16:1--16:??",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2815619",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Automatic evaluation of machine translations is an
                 important task. Most existing evaluation metrics rely
                 on matching the same word or letter n -grams. This
                 strategy leads to poor results on Chinese translations
                 because one has to rely merely on matching identical
                 characters. In this article, we propose a new
                 evaluation metric that allows different characters with
                 the same or similar meaning to match. An Indirect
                 Hidden Markov Model (IHMM) is proposed to align the
                 Chinese translation with human references at the
                 character level. In the model, the emission
                 probabilities are estimated by character similarity,
                 including character semantic similarity and character
                 surface similarity, and transition probabilities are
                 estimated by a heuristic distance-based distortion
                 model. When evaluating the submitted output of
                 English-to-Chinese translation systems in the IWSLT'08
                 CT-EC and NIST'08 EC tasks, the experimental results
                 indicate that the proposed metric has a significantly
                 better correlation with human evaluation than the
                 state-of-the-art machine translation metrics (i.e.,
                 BLEU, Meteor Universal, and TESLA-CELAB). This study
                 shows that it is important to allow different
                 characters to match in the evaluation of Chinese
                 translations and that the IHMM is a reasonable approach
                 for the alignment of Chinese characters.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Abuaiadah:2016:UBM,
  author =       "Diab Abuaiadah",
  title =        "Using Bisect {$K$}-Means Clustering Technique in the
                 Analysis of {Arabic} Documents",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "3",
  pages =        "17:1--17:??",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2812809",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In this article, I have investigated the performance
                 of the bisect K-means clustering algorithm compared to
                 the standard K-means algorithm in the analysis of
                 Arabic documents. The experiments included five
                 commonly used similarity and distance functions
                 (Pearson correlation coefficient, cosine, Jaccard
                 coefficient, Euclidean distance, and averaged
                 Kullback--Leibler divergence) and three leading
                 stemmers. Using the purity measure, the bisect K-means
                 clearly outperformed the standard K-means in all
                 settings with varying margins. For the bisect K-means,
                 the best purity reached 0.927 when using the Pearson
                 correlation coefficient function, while for the
                 standard K-means, the best purity reached 0.884 when
                 using the Jaccard coefficient function. Removing stop
                 words significantly improved the results of the bisect
                 K-means but produced minor improvements in the results
                 of the standard K-means. Stemming provided additional
                 minor improvement in all settings except the
                 combination of the averaged Kullback--Leibler
                 divergence function and the root-based stemmer, where
                 the purity was deteriorated by more than 10\%. These
                 experiments were conducted using a dataset with nine
                 categories, each of which contains 300 documents.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Elayeb:2016:ACL,
  author =       "Bilel Elayeb and Ibrahim Bounhas",
  title =        "{Arabic} Cross-Language Information Retrieval: a
                 Review",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "3",
  pages =        "18:1--18:??",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2789210",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Cross-language information retrieval (CLIR) deals with
                 retrieving relevant documents in one language using
                 queries expressed in another language. As CLIR tools
                 rely on translation techniques, they are challenged by
                 the properties of highly derivational and flexional
                 languages like Arabic. Much work has been done on CLIR
                 for different languages including Arabic. In this
                 article, we introduce the reader to the motivations for
                 solving some problems related to Arabic CLIR
                 approaches. The evaluation of these approaches is
                 discussed starting from the 2001 and 2002 TREC Arabic
                 CLIR tracks, which aim to objectively evaluate CLIR
                 systems. We also study many other research works to
                 highlight the unresolved problems or those that require
                 further investigation. These works are discussed in the
                 light of a deep study of the specificities and the
                 tasks of Arabic information retrieval (IR). Particular
                 attention is given to translation techniques and CLIR
                 resources, which are key issues challenging Arabic
                 CLIR. To push research in this field, we discuss how a
                 new standard collection can improve Arabic IR and CLIR
                 tracks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhao:2016:ALM,
  author =       "Yinggong Zhao and Shujian Huang and Xin-Yu Dai and
                 Jiajun Chen",
  title =        "Adaptation of Language Models for {SMT} Using Neural
                 Networks with Topic Information",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "3",
  pages =        "19:1--19:??",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2816816",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Neural network language models (LMs) are shown to be
                 effective in improving the performance of statistical
                 machine translation (SMT) systems. However,
                 state-of-the-art neural network LMs usually use words
                 before the current position as context and neglect
                 global topic information, which can help machine
                 translation (MT) systems to select better translation
                 candidates from a higher perspective. In this work, we
                 propose improvement of the state-of-the-art feedforward
                 neural language model with topic information. Two main
                 issues need to be tackled when adding topics into
                 neural network LMs for SMT: one is how to incorporate
                 topics to the neural network; the other is how to get
                 target-side topic distribution before translation. We
                 incorporate topics by appending topic distribution to
                 the input layer of a feedforward LM. We adopt a
                 multinomial logistic-regression (MLR) model to predict
                 the target-side topic distribution based on source side
                 information. Moreover, we propose a feedforward neural
                 network model to learn joint representations on the
                 source side for topic prediction. LM experiments
                 demonstrate that the perplexity on validation set can
                 be greatly reduced by the topic-enhanced feedforward
                 LM, and the prediction of target-side topics can be
                 improved dramatically with the MLR model equipped with
                 the joint source representations. A final MT
                 experiment, conducted on a large-scale Chinese--English
                 dataset, shows that our feedforward LM with predicted
                 topics improves the translation performance against a
                 strong baseline.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ding:2016:IIE,
  author =       "Chenchen Ding and Keisuke Sakanushi and Hirona Touji
                 and Mikio Yamamoto",
  title =        "Inter-, Intra-, and Extra-Chunk Pre-Ordering for
                 Statistical {Japanese}-to-{English} Machine
                 Translation",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "3",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2818381",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:50 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "A rule-based pre-ordering approach is proposed for
                 statistical Japanese-to-English machine translation
                 using the dependency structure of source-side
                 sentences. A Japanese sentence is pre-ordered to an
                 English-like order at the morpheme level for a
                 statistical machine translation system during the
                 training and decoding phase to resolve the reordering
                 problem. In this article, extra-chunk pre-ordering of
                 morphemes is proposed, which allows Japanese functional
                 morphemes to move across chunk boundaries. This
                 contrasts with the intra-chunk reordering used in
                 previous approaches, which restricts the reordering of
                 morphemes within a chunk. Linguistically oriented
                 discussions show that correct pre-ordering cannot be
                 realized without extra-chunk movement of morphemes. The
                 proposed approach is compared with five rule-based
                 pre-ordering approaches designed for
                 Japanese-to-English translation and with a language
                 independent statistical pre-ordering approach on a
                 standard patent dataset and on a news dataset obtained
                 by crawling Internet news sites. Two state-of-the-art
                 statistical machine translation systems, one
                 phrase-based and the other hierarchical phrase-based,
                 are used in experiments. Experimental results show that
                 the proposed approach outperforms the compared
                 approaches on automatic reordering measures (Kendall's
                 $ \tau $, Spearman's $ \rho $, fuzzy reordering score,
                 and test set RIBES) and on the automatic translation
                 precision measure of test set BLEU score.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Lee:2015:ISI,
  author =       "Lung-Hao Lee and Gina-Anne Levow and Shih-Hung Wu and
                 Chao-Lin Liu",
  title =        "Introduction to the Special Issue on {Chinese} Spell
                 Checking",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "4",
  pages =        "14:1--14:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2818354",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/spell.bib;
                 https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  note =         "Special issue on Chinese spell checking.",
  abstract =     "This special issue contains four articles based on and
                 expanded from systems presented at the SIGHAN-7 Chinese
                 Spelling Check Bakeoff. We provide an overview of the
                 approaches and designs for Chinese spelling checkers
                 presented in these articles. We conclude this
                 introductory article with a summary of possible future
                 directions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Chen:2015:PFC,
  author =       "Kuan-Yu Chen and Hsin-Min Wang and Hsin-Hsi Chen",
  title =        "A Probabilistic Framework for {Chinese} Spelling
                 Check",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "4",
  pages =        "15:1--15:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2826234",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/spell.bib;
                 https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  note =         "Special issue on Chinese spell checking.",
  abstract =     "Chinese spelling check (CSC) is still an unsolved
                 problem today since there are many homonymous or
                 homomorphous characters. Recently, more and more CSC
                 systems have been proposed. To the best of our
                 knowledge, language modeling is one of the major
                 components among these systems because of its
                 simplicity and moderately good predictive power. After
                 deeply analyzing the school of research, we are aware
                 that most of the systems only employ the conventional n
                 -gram language models. The contributions of this
                 article are threefold. First, we propose a novel
                 probabilistic framework for CSC, which naturally
                 combines several important components, such as the
                 substitution model and the language model, to inherit
                 their individual merits as well as to overcome their
                 limitations. Second, we incorporate the topic language
                 models into the CSC system in an unsupervised fashion.
                 The topic language models can capture the long-span
                 semantic information from a word (character) string
                 while the conventional n -gram language models can only
                 preserve the local regularity information. Third, we
                 further integrate Web resources with the proposed
                 framework to enhance the overall performance. Our
                 rigorously empirical experiments demonstrate the
                 consistent and utility performance of the proposed
                 framework in the CSC task.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2015:HRA,
  author =       "Xiaodong Liu and Fei Cheng and Kevin Duh and Yuji
                 Matsumoto",
  title =        "A Hybrid Ranking Approach to {Chinese} Spelling
                 Check",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "4",
  pages =        "16:1--16:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2822264",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/spell.bib;
                 https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  note =         "Special issue on Chinese spell checking.",
  abstract =     "We propose a novel framework for Chinese Spelling
                 Check (CSC), which is an automatic algorithm to detect
                 and correct Chinese spelling errors. Our framework
                 contains two key components: candidate generation and
                 candidate ranking. Our framework differs from previous
                 research, such as Statistical Machine Translation (SMT)
                 based model or Language Model (LM) based model, in that
                 we use both SMT and LM models as components of our
                 framework for generating the correction candidates, in
                 order to obtain maximum recall; to improve the
                 precision, we further employ a Support Vector Machines
                 (SVM) classifier to rank the candidates generated by
                 the SMT and the LM. Experiments show that our framework
                 outperforms other systems, which adopted the same or
                 similar resources as ours in the SIGHAN 7 shared task;
                 even comparing with the state-of-the-art systems, which
                 used more resources, such as a considerable large
                 dictionary, an idiom dictionary and other semantic
                 information, our framework still obtains competitive
                 results. Furthermore, to address the resource
                 scarceness problem for training the SMT model, we
                 generate around 2 million artificial training sentences
                 using the Chinese character confusion sets, which
                 include a set of Chinese characters with similar shapes
                 and similar pronunciations, provided by the SIGHAN 7
                 shared task.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yeh:2015:CSC,
  author =       "Jui-Feng Yeh and Wen-Yi Chen and Mao-Chuan Su",
  title =        "{Chinese} Spelling Checker Based on an Inverted Index
                 List with a Rescoring Mechanism",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "4",
  pages =        "17:1--17:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2826235",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/spell.bib;
                 https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  note =         "Special issue on Chinese spell checking.",
  abstract =     "An approach is proposed for Chinese spelling error
                 detection and correction, in which an inverted index
                 list with a rescoring mechanism is used. The inverted
                 index list is a structure for mapping from word to
                 desired sentence, and for representing nodes in
                 lattices constructed through character expansion
                 (according to predefined phonologically and visually
                 similar character sets). Pruning based on a contextual
                 dependency confidence measure was used to markedly
                 reduce the search space and computational complexity.
                 Relevant mapping relations between the original input
                 and desired input were obtained using a scoring
                 mechanism composed of class-based language and maximum
                 entropy correction models containing character, word,
                 and contextual features. The proposed method was
                 evaluated using data sets provided by SigHan 7 bakeoff.
                 The experimental results show that the proposed method
                 achieved acceptable performance in terms of recall rate
                 or precision rate in error sentence detection and error
                 location detection, and it outperformed other
                 approaches in error location detection and
                 correction.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Hsieh:2015:CCS,
  author =       "Yu-Ming Hsieh and Ming-Hong Bai and Shu-Ling Huang and
                 Keh-Jiann Chen",
  title =        "Correcting {Chinese} Spelling Errors with Word Lattice
                 Decoding",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "4",
  pages =        "18:1--18:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2791389",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/spell.bib;
                 https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  note =         "Special issue on Chinese spell checking.",
  abstract =     "Chinese spell checkers are more difficult to develop
                 because of two language features: (1) there are no word
                 boundaries, and a character may function as a word or a
                 word morpheme; and (2) the Chinese character set
                 contains more than ten thousand characters. The former
                 makes it difficult for a spell checker to detect
                 spelling errors, and the latter makes it difficult for
                 a spell checker to construct error models. We develop a
                 word lattice decoding model for a Chinese spell checker
                 that addresses these difficulties. The model performs
                 word segmentation and error correction simultaneously,
                 thereby solving the word boundary problem. The model
                 corrects nonword errors as well as real-word errors. In
                 order to better estimate the error distribution of
                 large character sets for error models, we also propose
                 a methodology to extract spelling error samples
                 automatically from the Google web 1T corpus. Due to the
                 large quantity of data in the Google web 1T corpus,
                 many spelling error samples can be extracted, better
                 reflecting spelling error distributions in the real
                 world. Finally, in order to improve the spell checker
                 for real applications, we produce $n$-best suggestions
                 for spelling error corrections. We test our proposed
                 approach with the Bakeoff 2013 CSC Datasets; the
                 results show that the proposed methods with the error
                 model significantly outperform the performance of
                 Chinese spell checkers that do not use error models.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Anonymous:2015:TPE,
  author =       "Anonymous",
  title =        "{TALLIP} Perspectives: Editorial Commentary: The State
                 of the Journal",
  journal =      j-TALLIP,
  volume =       "14",
  number =       "4",
  pages =        "19:1--19:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2823512",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:49 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  note =         "Special issue on Chinese spell checking.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Hakro:2016:PTI,
  author =       "Dil Nawaz Hakro and Abdullah Zawawi Talib",
  title =        "Printed Text Image Database for {Sindhi} {OCR}",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "4",
  pages =        "21:1--21:??",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2846093",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Document Image Understanding (DIU) and Electronic
                 Document Management are active fields of research
                 involving image understanding, interpretation,
                 efficient handling, and routing of documents as well as
                 their retrieval. Research on most of the noncursive
                 scripts (Latin) has matured, whereas research on the
                 cursive (connected) scripts is still moving toward
                 perfection. Many researchers are currently working on
                 the cursive scripts (Arabic and other scripts adopting
                 it) around the world so that the difficulties and
                 challenges in document understanding and handling of
                 these scripts can be overcome. Sindhi script has the
                 largest extension of the original Arabic alphabet among
                 languages adopting the Arabic script; it contains 52
                 characters, compared to 28 characters in the original
                 Arabic alphabet, in order to accommodate more sounds
                 for the language. There are 24 differentiating
                 characters with some possessing four dots. For Sindhi
                 OCR research and development, a database is needed for
                 training and testing of Sindhi text images. We have
                 developed a large database containing over 4 billion
                 words and 15 billion characters in 150 various fonts in
                 four font weights and four styles. The database
                 contents were collected from various sources including
                 websites, books, and theses. A custom-built application
                 was also developed to create a text image from a text
                 document that supports various fonts and sizes. The
                 database considers words, characters, characters with
                 spaces, and lines. The database is freely available as
                 a partial or full database by sending an email to one
                 of the authors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ding:2016:WSB,
  author =       "Chenchen Ding and Ye Kyaw Thu and Masao Utiyama and
                 Eiichiro Sumita",
  title =        "Word Segmentation for {Burmese} ({Myanmar})",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "4",
  pages =        "22:1--22:??",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2846095",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Experiments on various word segmentation approaches
                 for the Burmese language are conducted and discussed in
                 this note. Specifically, dictionary-based, statistical,
                 and machine learning approaches are tested.
                 Experimental results demonstrate that statistical and
                 machine learning approaches perform significantly
                 better than dictionary-based approaches. We believe
                 that this note, based on an annotated corpus of
                 relatively considerable size (containing approximately
                 a half million words), is the first systematic
                 comparison of word segmentation approaches for Burmese.
                 This work aims to discover the properties and proper
                 approaches to Burmese textual processing and to promote
                 further researches on this understudied language.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhang:2016:ITP,
  author =       "Tongtao Zhang and Aritra Chowdhury and Nimit Dhulekar
                 and Jinjing Xia and Kevin Knight and Heng Ji and
                 B{\"u}lent Yener and Liming Zhao",
  title =        "From Image to Translation: Processing the Endangered
                 {Nyushu} Script",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "4",
  pages =        "23:1--23:??",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2857052",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "The lack of computational support has significantly
                 slowed down automatic understanding of endangered
                 languages. In this paper, we take Nyushu (simplified
                 Chinese: [Chinese characters]; literally: ``women's
                 writing'') as a case study to present the first
                 computational approach that combines Computer Vision
                 and Natural Language Processing techniques to deeply
                 understand an endangered language. We developed an
                 end-to-end system to read a scanned hand-written Nyushu
                 article, segment it into characters, link them to
                 standard characters, and then translate the article
                 into Mandarin Chinese. We propose several novel methods
                 to address the new challenges introduced by noisy input
                 and low resources, including Nyushu-specific feature
                 selection for character segmentation and linking, and
                 character linking lattice based Machine Translation.
                 The end-to-end system performance indicates that the
                 system is a promising approach and can serve as a
                 standard benchmark.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sarigil:2016:SPW,
  author =       "Erdem Sarigil and Oguz Yilmaz and Ismail Sengor
                 Altingovde and Rifat Ozcan and {\"O}zg{\"U}r Ulusoy",
  title =        "A ``Suggested'' Picture of {Web} Search in {Turkish}",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "4",
  pages =        "24:1--24:??",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2891105",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Although query log analysis provides crucial insights
                 about Web users' search interests, conducting such
                 analyses is almost impossible for some languages, as
                 large-scale and public query logs are quite scarce. In
                 this study, we first survey the existing query
                 collections in Turkish and discuss their limitations.
                 Next, we adopt a novel strategy to obtain a set of
                 Turkish queries using the query autocompletion services
                 from the four major search engines and provide the
                 first large-scale analysis of Web queries and their
                 results in Turkish.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Goswami:2016:CPG,
  author =       "Mukesh M. Goswami and Suman K. Mitra",
  title =        "Classification of Printed {Gujarati} Characters Using
                 Low-Level Stroke Features",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "4",
  pages =        "25:1--25:??",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2856105",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article presents an elegant technique for
                 extracting the low-level stroke features, such as
                 endpoints, junction points, line elements, and curve
                 elements, from offline printed text using a template
                 matching approach. The proposed features are used to
                 classify a subset of characters from Gujarati script.
                 The database consists of approximately 16,782 samples
                 of 42 middle-zone symbols from the Gujarati character
                 set collected from three different sources: machine
                 printed books, newspapers, and laser printed documents.
                 The purpose of this division is to add variety in terms
                 of size, font type, style, ink variation, and boundary
                 deformation. The experiments are performed on the
                 database using a k-nearest neighbor (kNN) classifier
                 and results are compared with other widely used
                 structural features, namely Chain Codes (CC),
                 Directional Element Features (DEF), and Histogram of
                 Oriented Gradients (HoG). The results show that the
                 features are quite robust against the variations and
                 give comparable performance with other existing
                 works.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Choudhary:2016:FTA,
  author =       "Prakash Choudhary and Neeta Nain",
  title =        "A Four-Tier Annotated {Urdu} Handwritten Text Image
                 Dataset for Multidisciplinary Research on {Urdu}
                 Script",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "4",
  pages =        "26:1--26:??",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2857053",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article introduces a large handwritten text
                 document image corpus dataset for Urdu script named
                 CALAM (Cursive And Language Adaptive Methodologies).
                 The database contains unconstrained handwritten
                 sentences along with their structural annotations for
                 the offline handwritten text images with their XML
                 representation. Urdu is the fourth most frequently used
                 language in the world, but due to its complex cursive
                 writing script and low resources, it is still a thrust
                 area for document image analysis. Here, a unified
                 approach is applied in the development of an Urdu
                 corpus by collecting printed texts, handwritten texts,
                 and demographic information of writers on a single
                 form. CALAM contains 1,200 handwritten text images,
                 3,043 lines, 46,664 words, and 101,181 ligatures. For
                 capturing maximum variance among the words and
                 handwritten styles, data collection is distributed
                 among six categories and 14 subcategories. Handwritten
                 forms were filled out by 725 different writers
                 belonging to different geographical regions, ages, and
                 genders with diverse educational backgrounds. A
                 structure has been designed to annotate handwritten
                 Urdu script images at line, word, and ligature levels
                 with an XML standard to provide a ground truth of each
                 image at different levels of annotation. This corpus
                 would be very useful for linguistic research in
                 benchmarking and providing a testbed for evaluation of
                 handwritten text recognition techniques for Urdu
                 script, signature verification, writer identification,
                 digital forensics, classification of printed and
                 handwritten text, categorization of texts as per use,
                 and so on. The experimental results of some recently
                 developed handwritten text line segmentation techniques
                 experimented on the proposed dataset are also presented
                 in the article for asserting its viability and
                 usability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Norimatsu:2016:FCL,
  author =       "Jun-Ya Norimatsu and Makoto Yasuhara and Toru Tanaka
                 and Mikio Yamamoto",
  title =        "A Fast and Compact Language Model Implementation Using
                 Double-Array Structures",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "4",
  pages =        "27:1--27:??",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2873068",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "The language model is a widely used component in
                 fields such as natural language processing, automatic
                 speech recognition, and optical character recognition.
                 In particular, statistical machine translation uses
                 language models, and the translation speed and the
                 amount of memory required are greatly affected by the
                 performance of the language model implementation. We
                 propose a fast and compact implementation of n -gram
                 language models that increases query speed and reduces
                 memory usage by using a double-array structure, which
                 is known to be a fast and compact trie data structure.
                 We propose two types of implementation: one for
                 backward suffix trees and the other for reverse tries.
                 The data structure is optimized for space efficiency by
                 embedding model parameters into otherwise unused spaces
                 in the double-array structure. We show that the reverse
                 trie version of our method is among the smallest
                 state-of-the-art implementations in terms of model size
                 with almost the same speed as the implementation that
                 performs fastest on perplexity calculation tasks.
                 Similarly, we achieve faster decoding while keeping
                 compact model sizes, and we confirm that our method can
                 utilize the efficiency of the double-array structure to
                 achieve a balance between speed and size on translation
                 tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yang:2016:LGF,
  author =       "Haitong Yang and Chengqing Zong",
  title =        "Learning Generalized Features for Semantic Role
                 Labeling",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "4",
  pages =        "28:1--28:??",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2890496",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article makes an effort to improve Semantic Role
                 Labeling (SRL) through learning generalized features.
                 The SRL task is usually treated as a supervised
                 problem. Therefore, a huge set of features are crucial
                 to the performance of SRL systems. But these features
                 often lack generalization powers when predicting an
                 unseen argument. This article proposes a simple
                 approach to relieve the issue. A strong intuition is
                 that arguments occurring in similar syntactic positions
                 are likely to bear the same semantic role, and,
                 analogously, arguments that are lexically similar are
                 likely to represent the same semantic role. Therefore,
                 it will be informative to SRL if syntactic or lexical
                 similar arguments can activate the same feature.
                 Inspired by this, we embed the information of
                 lexicalization and syntax into a feature vector for
                 each argument and then use K -means to make clustering
                 for all feature vectors of training set. For an unseen
                 argument to be predicted, it will belong to the same
                 cluster as its similar arguments of training set.
                 Therefore, the clusters can be thought of as a kind of
                 generalized feature. We evaluate our method on several
                 benchmarks. The experimental results show that our
                 approach can significantly improve the SRL
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bhowmik:2016:BHC,
  author =       "Tapan Kumar Bhowmik and Swapan Kumar Parui and Utpal
                 Roy and Lambert Schomaker",
  title =        "{Bangla} Handwritten Character Segmentation Using
                 Structural Features: a Supervised and Bootstrapping
                 Approach",
  journal =      j-TALLIP,
  volume =       "15",
  number =       "4",
  pages =        "29:1--29:??",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2890497",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In this article, we propose a new framework for
                 segmentation of Bangla handwritten word images into
                 meaningful individual symbols or pseudo-characters.
                 Existing segmentation algorithms are not usually
                 treated as a classification problem. However, in the
                 present study, the segmentation algorithm is looked
                 upon as a two-class supervised classification problem.
                 The method employs an SVM classifier to select the
                 segmentation points on the word image on the basis of
                 various structural features. For training of the SVM
                 classifier, an unannotated training set is prepared
                 first using candidate segmenting points. The training
                 set is then clustered, and each cluster is labeled
                 manually with minimal manual intervention. A
                 semi-automatic bootstrapping technique is also employed
                 to enlarge the training set from new samples. The
                 overall architecture describes a basic step toward
                 building an annotation system for the segmentation
                 problem, which has not so far been investigated. The
                 experimental results show that our segmentation method
                 is quite efficient in segmenting not only word images
                 but also handwritten texts. As a part of this work, a
                 database of Bangla handwritten word images has also
                 been developed. Considering our data collection method
                 and a statistical analysis of our lexicon set, we claim
                 that the relevant characteristics of an ideal lexicon
                 set are present in our handwritten word image
                 database.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Singh:2016:OHG,
  author =       "Sukhdeep Singh and Anuj Sharma and Indu Chhabra",
  title =        "Online Handwritten {Gurmukhi} Strokes Dataset Based on
                 Minimal Set of Words",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "1",
  pages =        "1:1--1:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2896318",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "The online handwriting data are an integral part of
                 data analysis and classification research, as collected
                 handwritten data offers many challenges to group
                 handwritten stroke classes. The present work has been
                 done for grouping handwritten strokes from the Indic
                 script Gurmukhi. Gurmukhi is the script of the popular
                 and widely spoken language Punjabi. The present work
                 includes development of the dataset of Gurmukhi words
                 in the context of online handwriting recognition for
                 real-life use applications, such as maps navigation. We
                 have collected the data of 100 writers from the largest
                 cities in the Punjab region. The writers' variations,
                 such as writing skill level (beginner, moderate, and
                 expert), gender, right or left handedness, and their
                 adaptability to digital handwriting, have been
                 considered in dataset development. We have introduced a
                 novel technique to form handwritten stroke classes
                 based on a limited set of words. The presence of all
                 alphabets including vowels of Gurmukhi script has been
                 considered before selection of a word. The developed
                 dataset includes 39,411 strokes from handwritten words
                 and forms 72 classes of strokes after using a k-means
                 clustering technique and manual verification through
                 expert and moderate writers. We have achieved
                 recognition results using the Hidden Markov Model as
                 87.10\%, 85.43\%, and 84.33\% for middle zone strokes
                 when using training data as 66\%, 50\%, and 80\% of the
                 developed dataset. The present work is a step in a
                 direction to find groups for unknown handwriting
                 strokes with reasonably higher levels of accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{El-Fiqi:2016:PCC,
  author =       "Heba El-Fiqi and Eleni Petraki and Hussein A. Abbass",
  title =        "Pairwise Comparative Classification for Translator
                 Stylometric Analysis",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "1",
  pages =        "2:1--2:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2898997",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In this article, we present a new type of
                 classification problem, which we call Comparative
                 Classification Problem (CCP), where we use the term
                 data record to refer to a block of instances. Given a
                 single data record with n instances for n classes, the
                 CCP problem is to map each instance to a unique class.
                 This problem occurs in a wide range of applications
                 where the independent and identically distributed
                 assumption is broken down. The primary difference
                 between CCP and classical classification is that in the
                 latter, the assignment of a translator to one record is
                 independent of the assignment of a translator to a
                 different record. In CCP, however, the assignment of a
                 translator to one record within a block excludes this
                 translator from further assignments to any other record
                 in that block. The interdependency in the data poses
                 challenges for techniques relying on the independent
                 and identically distributed (iid) assumption. In the
                 Pairwise CCP (PWCCP), a pair of records is grouped
                 together. The key difference between PWCCP and
                 classical binary classification problems is that hidden
                 patterns can only be unmasked by comparing the
                 instances as pairs. In this article, we introduce a new
                 algorithm, PWC4.5, which is based on C4.5, to manage
                 PWCCP. We first show that a simple transformation-that
                 we call Gradient-Based Transformation (GBT)-can fix the
                 problem of iid in C4.5. We then evaluate PWC4.5 using
                 two real-world corpora to distinguish between
                 translators on Arabic-English and French-English
                 translations. While the traditional C4.5 failed to
                 distinguish between different translators, GBT
                 demonstrated better performance. Meanwhile, PWC4.5
                 consistently provided the best results over C4.5 and
                 GBT.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Qiao:2016:IUD,
  author =       "Xiuming Qiao and Hailong Cao and Tiejun Zhao",
  title =        "Improving Unsupervised Dependency Parsing with
                 Knowledge from Query Logs",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "1",
  pages =        "3:1--3:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2903720",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Unsupervised dependency parsing becomes more and more
                 popular in recent years because it does not need
                 expensive annotations, such as treebanks, which are
                 required for supervised and semi-supervised dependency
                 parsing. However, its accuracy is still far below that
                 of supervised dependency parsers, partly due to the
                 fact that their parsing model is insufficient to
                 capture linguistic phenomena underlying texts. The
                 performance for unsupervised dependency parsing can be
                 improved by mining knowledge from the texts and by
                 incorporating it into the model. In this article,
                 syntactic knowledge is acquired from query logs to help
                 estimate better probabilities in dependency models with
                 valence. The proposed method is language independent
                 and obtains an improvement of 4.1\% unlabeled accuracy
                 on the Penn Chinese Treebank by utilizing additional
                 dependency relations from the Sogou query logs and
                 Baidu query logs. Morever, experiments show that the
                 proposed model achieves improvements of 8.07\% on CoNLL
                 2007 English using the AOL query logs. We believe query
                 logs are useful sources of syntactic knowledge for many
                 natural language processing (NLP) tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Passban:2016:BNP,
  author =       "Peyman Passban and Qun Liu and Andy Way",
  title =        "Boosting Neural {POS} Tagger for {Farsi} Using
                 Morphological Information",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "1",
  pages =        "4:1--4:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2934676",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Farsi (Persian) is a low-resource language that
                 suffers from the data sparsity problem and a lack of
                 efficient processing tools. Due to their broad
                 application in natural language processing tasks,
                 part-of-speech (POS) taggers are one of those important
                 tools that should be considered in this respect.
                 Despite recent work on Farsi tagging, there is still
                 room for improvement. The best reported accuracy so far
                 is 96\%, which in special cases can rise to 96.9\%. The
                 main problem with existing taggers is their
                 inefficiency in coping with out-of-vocabulary (OOV)
                 words. Addressing both problems of accuracy and OOV
                 words, we developed a neural network-based POS tagger
                 (NPT) that performs efficiently on Farsi. Despite using
                 less data, NPT provides better results in comparison to
                 state-of-the-art systems. Our proposed tagger performs
                 with an accuracy of 97.4\%, with performance highly
                 influenced by morphological features. We carry out a
                 shallow morphological analysis and show considerable
                 improvement over the baseline configuration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2016:SBM,
  author =       "Liangliang Liu and Cungen Cao",
  title =        "A Seed-Based Method for Generating {Chinese} Confusion
                 Sets",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "1",
  pages =        "5:1--5:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2933396",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In natural language, people often misuse a word
                 (called a ``confused word'') in place of other words
                 (called ``confusing words''). In misspelling
                 corrections, many approaches to finding and correcting
                 misspelling errors are based on a simple notion called
                 a ``confusion set.'' The confusion set of a confused
                 word consists of confusing words. In this article, we
                 propose a new method of building Chinese character
                 confusion sets. Our method is composed of two major
                 phases. In the first phase, we build a list of seed
                 confusion sets for each Chinese character, which is
                 based on measuring similarity in character pinyin or
                 similarity in character shape. In this phase, all
                 confusion sets are constructed manually, and the
                 confusion sets are organized into a graph, called a
                 ``seed confusion graph'' (SCG), in which vertices
                 denote characters and edges are pairs of characters in
                 the form (confused character, confusing character). In
                 the second phase, we extend the SCG by acquiring more
                 pairs of (confused character, confusing character) from
                 a large Chinese corpus. For this, we use several word
                 patterns (or patterns) to generate new confusion pairs
                 and then verify the pairs before adding them into a
                 SCG. Comprehensive experiments show that our method of
                 extending confusion sets is effective. Also, we shall
                 use the confusion sets in Chinese misspelling
                 corrections to show the utility of our method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2016:ISP,
  author =       "Junhui Li and Muhua Zhu and Wei Lu and Guodong Zhou",
  title =        "Improving Semantic Parsing with Enriched Synchronous
                 Context-Free Grammars in Statistical Machine
                 Translation",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "1",
  pages =        "6:1--6:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2963099",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Semantic parsing maps a sentence in natural language
                 into a structured meaning representation. Previous
                 studies show that semantic parsing with synchronous
                 context-free grammars (SCFGs) achieves favorable
                 performance over most other alternatives. Motivated by
                 the observation that the performance of semantic
                 parsing with SCFGs is closely tied to the translation
                 rules, this article explores to extend translation
                 rules with high quality and increased coverage in three
                 ways. First, we examine the difference between word
                 alignments for semantic parsing and statistical machine
                 translation (SMT) to better adapt word alignment in SMT
                 to semantic parsing. Second, we introduce both
                 structure and syntax informed nonterminals, better
                 guiding the parsing in favor of well-formed structure,
                 instead of using a uninformed nonterminal in SCFGs.
                 Third, we address the unknown word translation issue
                 via synthetic translation rules. Last but not least, we
                 use a filtering approach to improve performance via
                 predicting answer type. Evaluation on the standard
                 GeoQuery benchmark dataset shows that our approach
                 greatly outperforms the state of the art across various
                 languages, including English, Chinese, Thai, German,
                 and Greek.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Krishnamurthi:2016:UDS,
  author =       "Karthik Krishnamurthi and Vijayapal Reddy Panuganti
                 and Vishnu Vardhan Bulusu",
  title =        "Understanding Document Semantics from Summaries: a
                 Case Study on {Hindi} Texts",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "1",
  pages =        "7:1--7:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2956236",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:51 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Summary of a document contains words that actually
                 contribute to the semantics of the document. Latent
                 Semantic Analysis (LSA) is a mathematical model that is
                 used to understand document semantics by deriving a
                 semantic structure based on patterns of word
                 correlations in the document. When using LSA to capture
                 semantics from summaries, it is observed that LSA
                 performs quite well despite being completely
                 independent of any external sources of semantics.
                 However, LSA can be remodeled to enhance its capability
                 to analyze correlations within texts. By taking
                 advantage of the model being language independent, this
                 article presents two stages of LSA remodeling to
                 understand document semantics in the Indian context,
                 specifically from Hindi text summaries. One stage of
                 remodeling is done by providing supplementary
                 information, such as document category and domain
                 information. The second stage of remodeling is done by
                 using a supervised term weighting measure in the
                 process. The remodeled LSA's performance is empirically
                 evaluated in a document classification application by
                 comparing the accuracies of classification to plain
                 LSA. An improvement in the performance of LSA in the
                 range of 4.7\% to 6.2\% is achieved from the remodel
                 when compared to the plain model. The results suggest
                 that summaries of documents efficiently capture the
                 semantic structure of documents and is an alternative
                 to full-length documents for understanding document
                 semantics.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Tursun:2016:STT,
  author =       "Eziz Tursun and Debasis Ganguly and Turghun Osman and
                 Ya-Ting Yang and Ghalip Abdukerim and Jun-Lin Zhou and
                 Qun Liu",
  title =        "A Semisupervised Tag-Transition-Based {Markovian}
                 Model for {Uyghur} Morphology Analysis",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "2",
  pages =        "8:1--8:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2968410",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Morphological analysis, which includes analysis of
                 part-of-speech (POS) tagging, stemming, and morpheme
                 segmentation, is one of the key components in natural
                 language processing (NLP), particularly for
                 agglutinative languages. In this article, we
                 investigate the morphological analysis of the Uyghur
                 language, which is the native language of the people in
                 the Xinjiang Uyghur autonomous region of western China.
                 Morphological analysis of Uyghur is challenging
                 primarily because of factors such as (1) ambiguities
                 arising due to the likelihood of association of a
                 multiple number of POS tags with a word stem or a
                 multiple number of functional tags with a word suffix,
                 (2) ambiguous morpheme boundaries, and (3) complex
                 morphopholonogy of the language. Further, the
                 unavailability of a manually annotated training set in
                 the Uyghur language for the purpose of word
                 segmentation makes Uyghur morphological analysis more
                 difficult. In our proposed work, we address these
                 challenges by undertaking a semisupervised approach of
                 learning a Markov model with the help of a manually
                 constructed dictionary of ``suffix to tag'' mappings in
                 order to predict the most likely tag transitions in the
                 Uyghur morpheme sequence. Due to the linguistic
                 characteristics of Uyghur, we incorporate a prior
                 belief in our model for favoring word segmentations
                 with a lower number of morpheme units. Empirical
                 evaluation of our proposed model shows an accuracy of
                 about 82\%. We further improve the effectiveness of the
                 tag transition model with an active learning paradigm.
                 In particular, we manually investigated a subset of
                 words for which the model prediction ambiguity was
                 within the top 20\%. Manually incorporating rules to
                 handle these erroneous cases resulted in an overall
                 accuracy of 93.81\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Nguyen:2016:ACN,
  author =       "Long H. B. Nguyen and Dien Dinh and Phuoc Tran",
  title =        "An Approach to Construct a Named Entity Annotated
                 {English--Vietnamese} Bilingual Corpus",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "2",
  pages =        "9:1--9:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2990191",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Manually constructing an annotated Named Entity (NE)
                 in a bilingual corpus is a time-consuming,
                 labor--intensive, and expensive process, but this is
                 necessary for natural language processing (NLP) tasks
                 such as cross-lingual information retrieval,
                 cross-lingual information extraction, machine
                 translation, etc. In this article, we present an
                 automatic approach to construct an annotated NE in
                 English-Vietnamese bilingual corpus from a bilingual
                 parallel corpus by proposing an aligned NE method.
                 Basing this corpus on a bilingual corpus in which the
                 initial NEs are extracted from its own language
                 separately, the approach tries to correct unrecognized
                 NEs or incorrectly recognized NEs before aligning the
                 NEs by using a variety of bilingual constraints. The
                 generated corpus not only improves the NE recognition
                 results but also creates alignments between English NEs
                 and Vietnamese NEs, which are necessary for training NE
                 translation models. The experimental results show that
                 the approach outperforms the baseline methods
                 effectively. In the English-Vietnamese NE alignment
                 task, the F-measure increases from 68.58\% to 79.77\%.
                 Thanks to the improvement of the NE recognition
                 quality, the proposed method also increases
                 significantly: the F-measure goes from 84.85\% to
                 88.66\% for the English side and from 75.71\% to
                 85.55\% for the Vietnamese side. By providing the
                 additional semantic information for the machine
                 translation systems, the BLEU score increases from
                 33.04\% to 45.11\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Chou:2016:BWN,
  author =       "Chien-Lung Chou and Chia-Hui Chang and Ya-Yun Huang",
  title =        "Boosted {Web} Named Entity Recognition via
                 Tri-Training",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "2",
  pages =        "10:1--10:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2963100",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Named entity extraction is a fundamental task for many
                 natural language processing applications on the web.
                 Existing studies rely on annotated training data, which
                 is quite expensive to obtain large datasets, limiting
                 the effectiveness of recognition. In this research, we
                 propose a semisupervised learning approach for web
                 named entity recognition (NER) model construction via
                 automatic labeling and tri-training. The former
                 utilizes structured resources containing known named
                 entities for automatic labeling, while the latter makes
                 use of unlabeled examples to improve the extraction
                 performance. Since this automatically labeled training
                 data may contain noise, a self-testing procedure is
                 used as a follow-up to remove low-confidence annotation
                 and prepare higher-quality training data. Furthermore,
                 we modify tri-training for sequence labeling and derive
                 a proper initialization for large dataset training to
                 improve entity recognition. Finally, we apply this
                 semisupervised learning framework for person name
                 recognition, business organization name recognition,
                 and location name extraction. In the task of Chinese
                 NER, an F-measure of 0.911, 0.849, and 0.845 can be
                 achieved, for person, business organization, and
                 location NER, respectively. The same framework is also
                 applied for English and Japanese business organization
                 name recognition and obtains models with performance of
                 a 0.832 and 0.803 F-measure.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sadek:2016:DBA,
  author =       "Jawad Sadek and Farid Meziane",
  title =        "A Discourse-Based Approach for {Arabic} Question
                 Answering",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "2",
  pages =        "11:1--11:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2988238",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "The treatment of complex questions with explanatory
                 answers involves searching for arguments in texts.
                 Because of the prominent role that discourse relations
                 play in reflecting text producers' intentions,
                 capturing the underlying structure of text constitutes
                 a good instructor in this issue. From our extensive
                 review, a system for automatic discourse analysis that
                 creates full rhetorical structures in large-scale
                 Arabic texts is currently unavailable. This is due to
                 the high computational complexity involved in
                 processing a large number of hypothesized relations
                 associated with large texts. Therefore, more practical
                 approaches should be investigated. This article
                 presents a new Arabic Text Parser oriented for
                 question-answering systems dealing with [Arabic
                 characters] ``why'' and [Arabic characters] ``how to''
                 questions. The Text Parser presented here considers the
                 sentence as the basic unit of text and incorporates a
                 set of heuristics to avoid computational explosion.
                 With this approach, the developed question-answering
                 system reached a significant improvement over the
                 baseline with a Recall of 68\% and MRR of 0.62.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Tran:2016:WRS,
  author =       "Phuoc Tran and Dien Dinh and Long H. B. Nguyen",
  title =        "Word Re-Segmentation in {Chinese--Vietnamese} Machine
                 Translation",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "2",
  pages =        "12:1--12:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2988237",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In isolated languages, such as Chinese and Vietnamese,
                 words are not separated by spaces, and a word may be
                 formed by one or more syllables. Therefore, word
                 segmentation (WS) is usually the first process that is
                 implemented in the machine translation process. WS in
                 the source and target languages is based on different
                 training corpora, and WS approaches may not be the
                 same. Therefore, the WS that results in these two
                 languages are not often homologous, and thus word
                 alignment results in many 1-n and n-1 alignment pairs
                 in statistical machine translation, which degrades the
                 performance of machine translation. In this article, we
                 will adjust the WS for both Chinese and Vietnamese in
                 particular and for isolated language pairs in general
                 and make the word boundary of the two languages more
                 symmetric in order to strengthen 1-1 alignments and
                 enhance machine translation performance. We have tested
                 this method on the Computational Linguistics Center's
                 corpus, which consists of 35,623 sentence pairs. The
                 experimental results show that our method has
                 significantly improved the performance of machine
                 translation compared to the baseline translation
                 system, WS translation system, and anchor
                 language-based WS translation systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2016:MSC,
  author =       "Peifeng Li and Guodong Zhou and Qiaoming Zhu",
  title =        "Minimally Supervised {Chinese} Event Extraction from
                 Multiple Views",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "2",
  pages =        "13:1--13:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2994600",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Although several semi-supervised learning models have
                 been proposed for English event extraction, there are
                 few successful stories in Chinese due to its special
                 characteristics. In this article, we propose a novel
                 minimally supervised model for Chinese event extraction
                 from multiple views. Besides the traditional pattern
                 similarity view (PSV), a semantic relationship view
                 (SRV) is introduced to capture the relevant event
                 mentions from relevant documents. Moreover, a
                 morphological structure view (MSV) is incorporated to
                 both infer more positive patterns and help filter
                 negative patterns via morphological structure
                 similarity. An evaluation of the ACE 2005 Chinese
                 corpus shows that our minimally supervised model
                 significantly outperforms several strong baselines.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Atreya:2016:QER,
  author =       "Arjun {Atreya V} and Ashish Kankaria and Pushpak
                 Bhattacharyya and Ganesh Ramakrishnan",
  title =        "Query Expansion in Resource-Scarce Languages: a
                 Multilingual Framework Utilizing Document Structure",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "2",
  pages =        "14:1--14:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2997643",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Retrievals in response to queries to search engines in
                 resource-scarce languages often produce no results,
                 which annoys the user. In such cases, at least
                 partially relevant documents must be retrieved. We
                 propose a novel multilingual framework, MultiStructPRF,
                 which expands the query with related terms by (i) using
                 a resource-rich assisting language and (ii) giving
                 varied importance to the expansion terms depending on
                 their position of occurrence in the document. Our
                 system uses the help of an assisting language to expand
                 the query in order to improve system recall. We propose
                 a systematic expansion model for weighting the
                 expansion terms coming from different parts of the
                 document. To combine the expansion terms from query
                 language and assisting language, we propose a
                 heuristics-based fusion model. Our experimental results
                 show an improvement over other PRF techniques in both
                 precision and recall for multiple resource-scarce
                 languages like Marathi, Bengali, Odia, Finnish, and the
                 like. We study the effect of different assisting
                 languages on precision and recall for multiple query
                 languages. Our experiments reveal an interesting fact:
                 Precision is positively correlated with the typological
                 closeness of query language and assisting language,
                 whereas recall is positively correlated with the
                 resource richness of the assisting language.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Finch:2017:IBL,
  author =       "Andrew Finch and Taisuke Harada and Kumiko
                 Tanaka-Ishii and Eiichiro Sumita",
  title =        "Inducing a Bilingual Lexicon from Short Parallel
                 Multiword Sequences",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "3",
  pages =        "15:1--15:??",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3003726",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article proposes a technique for mining bilingual
                 lexicons from pairs of parallel short word sequences.
                 The technique builds a generative model from a corpus
                 of training data consisting of such pairs. The model is
                 a hierarchical nonparametric Bayesian model that
                 directly induces a bilingual lexicon while training.
                 The model learns in an unsupervised manner and is
                 designed to exploit characteristics of the language
                 pairs being mined. The proposed model is capable of
                 utilizing commonly used word-pair frequency information
                 and additionally can employ the internal character
                 alignments within the words themselves. It is thereby
                 capable of mining transliterations and can use reliably
                 aligned transliteration pairs to support the mining of
                 other words in their context. The model is also capable
                 of performing word reordering and word deletion during
                 the alignment process, and it is furthermore capable of
                 operating in the absence of full segmentation
                 information. In this work, we study two mining tasks
                 based on English-Japanese and English--Chinese language
                 pairs, and compare the proposed approach to baselines
                 based on a simpler models that use only word-pair
                 frequency information. Our results show that the
                 proposed method is able to mine bilingual word pairs at
                 higher levels of precision and recall than the
                 baselines.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2017:CSC,
  author =       "Shaonan Wang and Chengqing Zong",
  title =        "Comparison Study on Critical Components in Composition
                 Model for Phrase Representation",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "3",
  pages =        "16:1--16:??",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3010088",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Phrase representation, an important step in many NLP
                 tasks, involves representing phrases as
                 continuous-valued vectors. This article presents
                 detailed comparisons concerning the effects of word
                 vectors, training data, and the composition and
                 objective function used in a composition model for
                 phrase representation. Specifically, we first discuss
                 how the augmented word representations affect the
                 performance of the composition model. Then, we
                 investigate whether different types of training data
                 influence the performance of the composition model and,
                 if so, how they influence it. Finally, we evaluate
                 combinations of different composition and objective
                 functions and discuss the factors related to
                 composition model performance. All evaluations were
                 conducted in both English and Chinese. Our main
                 findings are as follows: (1) The Additive model with
                 semantic enhanced word vectors performs comparably to
                 the state-of-the-art model; (2) The Additive model
                 which updates augmented word vectors and the Matrix
                 model with semantic enhanced word vectors
                 systematically outperforms the state-of-the-art model
                 in bigram and multi-word phrase similarity task,
                 respectively; (3) Representing the high frequency
                 phrases by estimating their surrounding contexts is a
                 good training objective for bigram phrase similarity
                 tasks; and (4) The performance gain of composition
                 model with semantic enhanced word vectors is due to the
                 composition function and the greater weight attached to
                 important words. Previous works focus on the
                 composition function; however, our findings indicate
                 that other components in the composition model
                 (especially word representation) make a critical
                 difference in phrase representation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bhat:2017:ITB,
  author =       "Riyaz Ahmad Bhat and Irshad Ahmad Bhat and Dipti Misra
                 Sharma",
  title =        "Improving Transition-Based Dependency Parsing of
                 {Hindi} and {Urdu} by Modeling Syntactically Relevant
                 Phenomena",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "3",
  pages =        "17:1--17:??",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3005447",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In recent years, transition-based parsers have shown
                 promise in terms of efficiency and accuracy. Though
                 these parsers have been extensively explored for
                 multiple Indian languages, there is still considerable
                 scope for improvement by properly incorporating
                 syntactically relevant information. In this article, we
                 enhance transition-based parsing of Hindi and Urdu by
                 redefining the features and feature extraction
                 procedures that have been previously proposed in the
                 parsing literature of Indian languages. We propose and
                 empirically show that properly incorporating
                 syntactically relevant information like case marking,
                 complex predication and grammatical agreement in an
                 arc-eager parsing model can significantly improve
                 parsing accuracy. Our experiments show an absolute
                 improvement of $ \approx 2 $ \% LAS for parsing of both
                 Hindi and Urdu over a competitive baseline which uses
                 rich features like part-of-speech (POS) tags, chunk
                 tags, cluster ids and lemmas. We also propose some
                 heuristics to identify ezafe constructions in Urdu
                 texts which show promising results in parsing these
                 constructions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Das:2017:NER,
  author =       "Arjun Das and Debasis Ganguly and Utpal Garain",
  title =        "Named Entity Recognition with Word Embeddings and
                 {Wikipedia} Categories for a Low-Resource Language",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "3",
  pages =        "18:1--18:??",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3015467",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In this article, we propose a word embedding--based
                 named entity recognition (NER) approach. NER is
                 commonly approached as a sequence labeling task with
                 the application of methods such as conditional random
                 field (CRF). However, for low-resource languages
                 without the presence of sufficiently large training
                 data, methods such as CRF do not perform well. In our
                 work, we make use of the proximity of the vector
                 embeddings of words to approach the NER problem. The
                 hypothesis is that word vectors belonging to the same
                 name category, such as a person's name, occur in close
                 vicinity in the abstract vector space of the embedded
                 words. Assuming that this clustering hypothesis is
                 true, we apply a standard classification approach on
                 the vectors of words to learn a decision boundary
                 between the NER classes. Our NER experiments are
                 conducted on a morphologically rich and low-resource
                 language, namely Bengali. Our approach significantly
                 outperforms standard baseline CRF approaches that use
                 cluster labels of word embeddings and gazetteers
                 constructed from Wikipedia. Further, we propose an
                 unsupervised approach (that uses an automatically
                 created named entity (NE) gazetteer from Wikipedia in
                 the absence of training data). For a low-resource
                 language, the word vectors obtained from Wikipedia are
                 not sufficient to train a classifier. As a result, we
                 propose to make use of the distance measure between the
                 vector embeddings of words to expand the set of
                 Wikipedia training examples with additional NEs
                 extracted from a monolingual corpus that yield
                 significant improvement in the unsupervised NER
                 performance. In fact, our expansion method performs
                 better than the traditional CRF-based (supervised)
                 approach (i.e., F-score of 65.4\% vs. 64.2\%). Finally,
                 we compare our proposed approach to the official
                 submission for the IJCNLP-2008 Bengali NER shared task
                 and achieve an overall improvement of F-score 11.26\%
                 with respect to the best official system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2017:IDR,
  author =       "Haoran Li and Jiajun Zhang and Chengqing Zong",
  title =        "Implicit Discourse Relation Recognition for {English}
                 and {Chinese} with Multiview Modeling and Effective
                 Representation Learning",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "3",
  pages =        "19:1--19:??",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3028772",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Discourse relations between two text segments play an
                 important role in many Natural Language Processing
                 (NLP) tasks. The connectives strongly indicate the
                 sense of discourse relations, while in fact, there are
                 no connectives in a large proportion of discourse
                 relations, that is, implicit discourse relations.
                 Compared with explicit relations, implicit relations
                 are much harder to detect and have drawn significant
                 attention. Until now, there have been many studies
                 focusing on English implicit discourse relations, and
                 few studies address implicit relation recognition in
                 Chinese even though the implicit discourse relations in
                 Chinese are more common than those in English. In our
                 work, both the English and Chinese languages are our
                 focus. The key to implicit relation prediction is to
                 properly model the semantics of the two discourse
                 arguments, as well as the contextual interaction
                 between them. To achieve this goal, we propose a neural
                 network based framework that consists of two
                 hierarchies. The first one is the model hierarchy, in
                 which we propose a max-margin learning method to
                 explore the implicit discourse relation from multiple
                 views. The second one is the feature hierarchy, in
                 which we learn multilevel distributed representations
                 from words, arguments, and syntactic structures to
                 sentences. We have conducted experiments on the
                 standard benchmarks of English and Chinese, and the
                 results show that compared with several methods our
                 proposed method can achieve the best performance in
                 most cases.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Tholpadi:2017:CBT,
  author =       "Goutham Tholpadi and Chiranjib Bhattacharyya and
                 Shirish Shevade",
  title =        "Corpus-Based Translation Induction in {Indian}
                 Languages Using Auxiliary Language Corpora from
                 {Wikipedia}",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "3",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3038295",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Identifying translations from comparable corpora is a
                 well-known problem with several applications. Existing
                 methods rely on linguistic tools or high-quality
                 corpora. Absence of such resources, especially in
                 Indian languages, makes this problem hard; for example,
                 state-of-the-art techniques achieve a mean reciprocal
                 rank of 0.66 for English--Italian, and a mere 0.187 for
                 Telugu-Kannada. In this work, we address the problem of
                 comparable corpora-based translation correspondence
                 induction (CC-TCI) when the only resources available
                 are small noisy comparable corpora extracted from
                 Wikipedia. We observe that translations in the source
                 and target languages have many topically related words
                 in common in other ``auxiliary'' languages. To model
                 this, we define the notion of a translingual theme, a
                 set of topically related words from auxiliary language
                 corpora, and present a probabilistic framework for
                 CC-TCI. Extensive experiments on 35 comparable corpora
                 showed dramatic improvements in performance. We extend
                 these ideas to propose a method for measuring
                 cross-lingual semantic relatedness (CLSR) between
                 words. To stimulate further research in this area, we
                 make publicly available two new high-quality
                 human-annotated datasets for CLSR. Experiments on the
                 CLSR datasets show more than 200\% improvement in
                 correlation on the CLSR task. We apply the method to
                 the real-world problem of cross-lingual Wikipedia title
                 suggestion and build the WikiTSu system. A user study
                 on WikiTSu shows a 20\% improvement in the quality of
                 titles suggested.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhao:2017:HMC,
  author =       "Hai Zhao and Deng Cai and Yang Xin and Yuzhu Wang and
                 Zhongye Jia",
  title =        "A Hybrid Model for {Chinese} Spelling Check",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "3",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3047405",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Apr 3 08:15:52 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Spelling check for Chinese has more challenging
                 difficulties than that for other languages. A hybrid
                 model for Chinese spelling check is presented in this
                 article. The hybrid model consists of three components:
                 one graph-based model for generic errors and two
                 independently trained models for specific errors. In
                 the graph model, a directed acyclic graph is generated
                 for each sentence, and the single-source shortest-path
                 algorithm is performed on the graph to detect and
                 correct general spelling errors at the same time. Prior
                 to that, two types of errors over functional words
                 (characters) are first solved by conditional random
                 fields: the confusion of ``[Chinese characters]'' (at)
                 (pinyin is zai in Chinese), ``[Chinese characters]''
                 (again, more, then) (pinyin: zai) and ``[Chinese
                 characters]'' (of) (pinyin: de), ``[Chinese
                 characters]'' (- ly, adverb-forming particle) (pinyin:
                 de), and ``[Chinese characters]'' (so that, have to)
                 (pinyin: de). Finally, a rule-based model is exploited
                 to distinguish pronoun usage confusion: ``[Chinese
                 characters]'' (she) (pinyin: ta), ``[Chinese
                 characters]'' (he) (pinyin: ta), and some other common
                 collocation errors. The proposed model is evaluated on
                 the standard datasets released by the SIGHAN Bake-off
                 shared tasks, giving state-of-the-art results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wali:2017:ECL,
  author =       "Wafa Wali and Bilel Gargouri and Adelmajid Ben
                 Hamadou",
  title =        "Evaluating the Content of {LMF} Standardized
                 Dictionaries: a Practical Experiment on {Arabic}
                 Language",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "4",
  pages =        "22:1--22:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3047406",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Since the age of paper versions, dictionaries are
                 often published with anomalies in their content
                 resulting from lexicographer's mistakes or from the
                 lack of efficiency of automatic enrichment systems.
                 Many of these anomalies are expensive to manually
                 detect and difficult to automatically control, notably
                 with lightly structured models of dictionaries. In this
                 article, we take advantage of the fine structure
                 proposed by the Lexical Markup Framework (LMF) norm to
                 investigate the detection of anomalies in the content
                 of LMF normalized dictionaries. First, we give a
                 theoretical study on the plausible anomalies, such as
                 inconsistency, incoherence, redundancy, and
                 incompleteness. Second, we detail the approach that we
                 propose for the automatic detection of such anomalies.
                 Finally, we report on an experiment carried out on an
                 available normalized dictionary of the Arabic language.
                 The experiment has shown that the proposed approach
                 gives reasonable results in terms of precision and
                 recall.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Baly:2017:STM,
  author =       "Ramy Baly and Hazem Hajj and Nizar Habash and Khaled
                 Bashir Shaban and Wassim El-Hajj",
  title =        "A Sentiment {Treebank} and Morphologically Enriched
                 Recursive Deep Models for Effective Sentiment Analysis
                 in {Arabic}",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "4",
  pages =        "23:1--23:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3086576",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Accurate sentiment analysis models encode the
                 sentiment of words and their combinations to predict
                 the overall sentiment of a sentence. This task becomes
                 challenging when applied to morphologically rich
                 languages (MRL). In this article, we evaluate the use
                 of deep learning advances, namely the Recursive Neural
                 Tensor Networks (RNTN), for sentiment analysis in
                 Arabic as a case study of MRLs. While Arabic may not be
                 considered the only representative of all MRLs, the
                 challenges faced and proposed solutions in Arabic are
                 common to many other MRLs. We identify, illustrate, and
                 address MRL-related challenges and show how RNTN is
                 affected by the morphological richness and orthographic
                 ambiguity of the Arabic language. To address the
                 challenges with sentiment extraction from text in MRL,
                 we propose to explore different orthographic features
                 as well as different morphological features at multiple
                 levels of abstraction ranging from raw words to roots.
                 A key requirement for RNTN is the availability of a
                 sentiment treebank; a collection of syntactic parse
                 trees annotated for sentiment at all levels of
                 constituency and that currently only exists in English.
                 Therefore, our contribution also includes the creation
                 of the first Arabic Sentiment Treebank (A rSenTB) that
                 is morphologically and orthographically enriched.
                 Experimental results show that, compared to the basic
                 RNTN proposed for English, our solution achieves
                 significant improvements up to 8\% absolute at the
                 phrase level and 10.8\% absolute at the sentence level,
                 measured by average F1 score. It also outperforms
                 well-known classifiers including Support Vector
                 Machines, Recursive Auto Encoders, and Long Short-Term
                 Memory by 7.6\%, 3.2\%, and 1.6\% absolute
                 respectively, all models being trained with similar
                 morphological considerations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Punchimudiyanse:2017:AFW,
  author =       "Malinda Punchimudiyanse and Ravinda Gayan Narendra
                 Meegama",
  title =        "Animation of Fingerspelled Words and Number Signs of
                 the {Sinhala} Sign Language",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "4",
  pages =        "24:1--24:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092743",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Sign language is the primary communication medium of
                 the aurally handicapped community. Often, a sign
                 gesture is mapped to a word or a phrase in a spoken
                 language and named as a conversational sign. A
                 fingerspelling sign is a special sign derived to show a
                 single character that matches a character in the
                 alphabet of a given language. This enables the deaf
                 community to express words that do not have a
                 conversational sign, such as a name, using a
                 letter-by-letter technique. Sinhala Sign Language (SSL)
                 uses a phonetic pronunciation mechanism to decode such
                 words due to the presence of one or more modifiers
                 after a consonant. Expressing numbers also have a
                 similar notation, and it is broken down into parts
                 before interpretation in sign gestures. This article
                 presents the variations implemented to make the 3D
                 avatar-based interpreter system look similar to an
                 actual fingerspelled SSL by a human interpreter. To
                 accomplish the task, a phonetic English-based 3D avatar
                 animation system is developed with Blender animation
                 software. The conversion of Sinhala Unicode text to
                 phonetic English and numbers written in digits to sign
                 gestures is done with a Visual Basic.NET (VB.NET)
                 application. The presented application has 61 SSL
                 fingerspelling signs and 40 SSL number signs. It is
                 capable of interpreting any word written using the
                 modern Sinhala alphabet without conversational signs
                 and interprets the numbers that go up to the billions.
                 This is a helpful tool in teaching SSL fingerspelling
                 and number signs of SSL to deaf children.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Al-Sallab:2017:ARD,
  author =       "Ahmad Al-Sallab and Ramy Baly and Hazem Hajj and
                 Khaled Bashir Shaban and Wassim El-Hajj and Gilbert
                 Badaro",
  title =        "{AROMA}: a Recursive Deep Learning Model for Opinion
                 Mining in {Arabic} as a Low Resource Language",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "4",
  pages =        "25:1--25:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3086575",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "While research on English opinion mining has already
                 achieved significant progress and success, work on
                 Arabic opinion mining is still lagging. This is mainly
                 due to the relative recency of research efforts in
                 developing natural language processing (NLP) methods
                 for Arabic, handling its morphological complexity, and
                 the lack of large-scale opinion resources for Arabic.
                 To close this gap, we examine the class of models used
                 for English and that do not require extensive use of
                 NLP or opinion resources. In particular, we consider
                 the Recursive Auto Encoder (RAE). However, RAE models
                 are not as successful in Arabic as they are in English,
                 due to their limitations in handling the morphological
                 complexity of Arabic, providing a more complete and
                 comprehensive input features for the auto encoder, and
                 performing semantic composition following the natural
                 way constituents are combined to express the overall
                 meaning. In this article, we propose A Recursive Deep
                 Learning Model for Opinion Mining in Arabic (AROMA)
                 that addresses these limitations. AROMA was evaluated
                 on three Arabic corpora representing different genres
                 and writing styles. Results show that AROMA achieved
                 significant performance improvements compared to the
                 baseline RAE. It also outperformed several well-known
                 approaches in the literature.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kong:2017:CSE,
  author =       "Fang Kong and Guodong Zhou",
  title =        "A {CDT}-Styled End-to-End {Chinese} Discourse Parser",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "4",
  pages =        "26:1--26:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3099557",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Discourse parsing is a challenging task and plays a
                 critical role in discourse analysis. Since the release
                 of the Rhetorical Structure Theory Discourse Treebank
                 and the Penn Discourse Treebank, the research on
                 English discourse parsing has attracted increasing
                 attention and achieved considerable success in recent
                 years. At the same time, some preliminary research on
                 certain subtasks about discourse parsing for other
                 languages, such as Chinese, has been conducted. In this
                 article, we present an end-to-end Chinese discourse
                 parser with the Connective-Driven Dependency Tree
                 scheme, which consists of multiple components in a
                 pipeline architecture, such as the elementary discourse
                 unit (EDU) detector, discourse relation recognizer,
                 discourse parse tree generator, and attribution
                 labeler. In particular, the attribution labeler
                 determines two attributions (i.e., sense and centering)
                 for every nonterminal node (i.e., discourse relation)
                 in the discourse parse trees. Systematically, our
                 parser detects all EDUs in a free text, generates the
                 discourse parse tree in a bottom-up way, and determines
                 the sense and centering attributions for all
                 nonterminal nodes by traversing the discourse parse
                 tree. Comprehensive evaluation on the Connective-Driven
                 Dependency Treebank corpus from both component-wise and
                 error-cascading perspectives is conducted to illustrate
                 how each component performs in isolation, and how the
                 pipeline performs with error propagation. Finally, it
                 shows that our end-to-end Chinese discourse parser
                 achieves an overall F1 score of 20\% with full
                 automation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2017:PAL,
  author =       "Shih-Hung Liu and Kuan-Yu Chen and Yu-Lun Hsieh and
                 Berlin Chen and Hsin-Min Wang and Hsu-Chun Yen and
                 Wen-Lian Hsu",
  title =        "A Position-Aware Language Modeling Framework for
                 Extractive Broadcast News Speech Summarization",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "4",
  pages =        "27:1--27:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3099472",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Extractive summarization, a process that automatically
                 picks exemplary sentences from a text (or spoken)
                 document with the goal of concisely conveying key
                 information therein, has seen a surge of attention from
                 scholars and practitioners recently. Using a language
                 modeling (LM) approach for sentence selection has been
                 proven effective for performing unsupervised extractive
                 summarization. However, one of the major difficulties
                 facing the LM approach is to model sentences and
                 estimate their parameters more accurately for each text
                 (or spoken) document. We extend this line of research
                 and make the following contributions in this work.
                 First, we propose a position-aware language modeling
                 framework using various granularities of
                 position-specific information to better estimate the
                 sentence models involved in the summarization process.
                 Second, we explore disparate ways to integrate the
                 positional cues into relevance models through a
                 pseudo-relevance feedback procedure. Third, we
                 extensively evaluate various models originated from our
                 proposed framework and several well-established
                 unsupervised methods. Empirical evaluation conducted on
                 a broadcast news summarization task further
                 demonstrates performance merits of the proposed
                 summarization methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Phani:2017:SLA,
  author =       "Shanta Phani and Shibamouli Lahiri and Arindam
                 Biswas",
  title =        "A Supervised Learning Approach for Authorship
                 Attribution of {Bengali} Literary Texts",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "4",
  pages =        "28:1--28:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3099473",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Authorship Attribution is a long-standing problem in
                 Natural Language Processing. Several statistical and
                 computational methods have been used to find a solution
                 to this problem. In this article, we have proposed
                 methods to deal with the authorship attribution problem
                 in Bengali. More specifically, we proposed a supervised
                 framework consisting of lexical and shallow features
                 and investigated the possibility of using
                 topic-modeling-inspired features, to classify documents
                 according to their authors. We have created a corpus
                 from nearly all the literary works of three eminent
                 Bengali authors, consisting of 3,000 disjoint samples.
                 Our models showed better performance than the
                 state-of-the-art, with more than 98\% test accuracy for
                 the shallow features and 100\% test accuracy for the
                 topic-based features. Further experiments with GloVe
                 vectors [Pennington et al. 2014] showed comparable
                 results, but flexible patterns based on content words
                 and high-frequency words [Schwartz et al. 2013] failed
                 to perform as well as expected.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Passban:2017:TLR,
  author =       "Peyman Passban and Qun Liu and Andy Way",
  title =        "Translating Low-Resource Languages by Vocabulary
                 Adaptation from Close Counterparts",
  journal =      j-TALLIP,
  volume =       "16",
  number =       "4",
  pages =        "29:1--29:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3099556",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Some natural languages belong to the same family or
                 share similar syntactic and/or semantic regularities.
                 This property persuades researchers to share
                 computational models across languages and benefit from
                 high-quality models to boost existing low-performance
                 counterparts. In this article, we follow a similar
                 idea, whereby we develop statistical and neural machine
                 translation (MT) engines that are trained on one
                 language pair but are used to translate another
                 language. First we train a reliable model for a
                 high-resource language, and then we exploit
                 cross-lingual similarities and adapt the model to work
                 for a close language with almost zero resources. We
                 chose Turkish (Tr) and Azeri or Azerbaijani (Az) as the
                 proposed pair in our experiments. Azeri suffers from
                 lack of resources as there is almost no bilingual
                 corpus for this language. Via our techniques, we are
                 able to train an engine for the Az -{$>$} English (En)
                 direction, which is able to outperform all other
                 existing models.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{S:2017:RMI,
  author =       "Sreelekha S. and Pushpak Bhattacharyya",
  title =        "Role of Morphology Injection in {SMT}: a Case Study
                 from {Indian} Language Perspective",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "1",
  pages =        "1:1--1:??",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3129208",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Phrase-based Statistical Machine Translation (PBSMT)
                 is commonly used for automatic translation. However,
                 PBSMT runs into difficulty when either or both of the
                 source and target languages are morphologically rich.
                 Factored models are found to be useful for such cases,
                 as they consider word as a vector of factors. These
                 factors can contain any information about the surface
                 word and use it while translating. The objective of the
                 current work is to handle morphological inflections in
                 Hindi, Marathi, and Malayalam using Factored
                 translation models when translating from English.
                 Statistical MT approaches face the problem of data
                 sparsity when translating to a morphologically rich
                 language. It is very unlikely for a parallel corpus to
                 contain all morphological forms of words. We propose a
                 solution to generate these unseen morphological forms
                 and inject them into the original training corpus. We
                 propose a simple and effective solution based on
                 enriching the input with various morphological forms of
                 words. We observe that morphology injection improves
                 the quality of translation in terms of both adequacy
                 and fluency. We verify this with experiments on three
                 morphologically rich languages when translating from
                 English. From the detailed evaluations, we observed an
                 order of magnitude improvement in translation
                 quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Malik:2017:UNE,
  author =       "Muhammad Kamran Malik",
  title =        "{Urdu} Named Entity Recognition and Classification
                 System Using Artificial Neural Network",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "1",
  pages =        "2:1--2:??",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3129290",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Named Entity Recognition and Classification (NERC) is
                 a process of identifying words and classifying them
                 into person names, location names, organization names,
                 and so on. In this article, we discuss the development
                 of an Urdu Named Entity (NE) corpus, called the
                 Kamran-PU-NE (KPU-NE) corpus, for three entity types,
                 that is, Person, Organization, and Location, and
                 marking the remaining tokens as Others (O). We use two
                 supervised learning algorithms, Hidden Markov Model
                 (HMM) and Artificial Neural Network (ANN), for the
                 development of the Urdu NERC system. We annotate the
                 652852-token corpus taken from 15 different genres with
                 a total of 44480 NEs. The inter-annotator agreement
                 between the two annotators in terms of Kappa k
                 statistic is 73.41\%. With HMM, the highest recorded
                 precision, recall, and f-measure values are 55.98\%,
                 83.11\%, and 66.90\%, respectively, and with ANN, they
                 are 81.05\%, 87.54\%, and 84.17\%, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kim:2017:PEN,
  author =       "Hyun Kim and Hun-Young Jung and Hongseok Kwon and
                 Jong-Hyeok Lee and Seung-Hoon Na",
  title =        "Predictor--Estimator: Neural Quality Estimation Based
                 on Target Word Prediction for Machine Translation",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "1",
  pages =        "3:1--3:??",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3109480",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Recently, quality estimation has been attracting
                 increasing interest from machine translation
                 researchers, aiming at finding a good estimator for the
                 ``quality'' of machine translation output. The common
                 approach for quality estimation is to treat the problem
                 as a supervised regression/classification task using a
                 quality-annotated noisy parallel corpus, called quality
                 estimation data, as training data. However, the
                 available size of quality estimation data remains
                 small, due to the too-expensive cost of creating such
                 data. In addition, most conventional quality estimation
                 approaches rely on manually designed features to model
                 nonlinear relationships between feature vectors and
                 corresponding quality labels. To overcome these
                 problems, this article proposes a novel neural network
                 architecture for quality estimation task-called the
                 predictor-estimator -that considers word prediction as
                 an additional pre-task. The major component of the
                 proposed neural architecture is a word prediction model
                 based on a modified neural machine translation model-a
                 probabilistic model for predicting a target word
                 conditioned on all the other source and target
                 contexts. The underlying assumption is that the word
                 prediction model is highly related to quality
                 estimation models and is therefore able to transfer
                 useful knowledge to quality estimation tasks. Our
                 proposed quality estimation method sequentially trains
                 the following two types of neural models: (1)
                 Predictor: a neural word prediction model trained from
                 parallel corpora and (2) Estimator: a neural quality
                 estimation model trained from quality estimation data.
                 To transfer word a prediction task to a quality
                 estimation task, we generate quality estimation feature
                 vectors from the word prediction model and feed them
                 into the quality estimation model. The experimental
                 results on WMT15 and 16 quality estimation datasets
                 show that our proposed method has great potential in
                 the various sub-challenges.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Almeman:2017:ABV,
  author =       "Khalid Almeman",
  title =        "Automatically Building {VoIP} Speech Parallel Corpora
                 for {Arabic} Dialects",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "1",
  pages =        "4:1--4:??",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3132708",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article discusses the process of automatically
                 building Arabic multi-dialect speech corpora using
                 Voice over Internet Protocol (VoIP). The Asterisk
                 framework was adopted to act as the main connection
                 between the parties, for which two virtual machines
                 were created: a sender and a receiver. The sender makes
                 a VoIP call to the receiver using the Asterisk
                 framework, while the receiver records the call
                 automatically, a process that is repeated for all the
                 audio files involved in the corpora. In this work, more
                 than 67,000 automatic calls were made between the
                 sender and receiver machines, generating VoIP Arabic
                 corpora for four Arabic dialects. The resulting corpora
                 can be considered the first Arabic VoIP parallel speech
                 corpora and will be made freely available to
                 researchers in Arabic NLP and speech recognition
                 research.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Tran:2017:LRB,
  author =       "Phuoc Tran and Dien Dinh and Tan Le and Long H. B.
                 Nguyen",
  title =        "Linguistic-Relationships-Based Approach for Improving
                 Word Alignment",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "1",
  pages =        "5:1--5:??",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3133323",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "The unsupervised word alignments (such as GIZA++) are
                 widely used in the phrase-based statistical machine
                 translation. The quality of the model is proportional
                 to the size and the quality of the bilingual corpus.
                 However, for low-resource language pairs such as
                 Chinese and Vietnamese, a result of unsupervised word
                 alignment sometimes is of low quality due to the sparse
                 data. In addition, this model does not take advantage
                 of the linguistic relationships to improve performance
                 of word alignment. Chinese and Vietnamese have the same
                 language type and have close linguistic relationships.
                 In this article, we integrate the characteristics of
                 linguistic relationships into the word alignment model
                 to enhance the quality of Chinese-Vietnamese word
                 alignment. These linguistic relationships are
                 Sino-Vietnamese and content word. The experimental
                 results showed that our method improved the performance
                 of word alignment as well as the quality of machine
                 translation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Cheng:2017:ECC,
  author =       "Xiyao Cheng and Ying Chen and Bixiao Cheng and
                 Shoushan Li and Guodong Zhou",
  title =        "An Emotion Cause Corpus for {Chinese} Microblogs with
                 Multiple-User Structures",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "1",
  pages =        "6:1--6:??",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3132684",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "A notably challenging problem in emotion analysis is
                 recognizing the cause of an emotion. Although there
                 have been a few studies on emotion cause detection,
                 most of them work on news reports or a few of them
                 focus on microblogs using a single-user structure
                 (i.e., all texts in a microblog are written by the same
                 user). In this article, we focus on emotion cause
                 detection for Chinese microblogs using a multiple-user
                 structure (i.e., texts in a microblog are successively
                 written by several users). First, based on the fact
                 that the causes of an emotion of a focused user may be
                 provided by other users in a microblog with the
                 multiple-user structure, we design an emotion cause
                 annotation scheme which can deal with such a
                 complicated case, and then provide an emotion cause
                 corpus using the annotation scheme. Second, based on
                 the analysis of the emotion cause corpus, we formalize
                 two emotion cause detection tasks for microblogs
                 (current-subtweet-based emotion cause detection and
                 original-subtweet-based emotion cause detection).
                 Furthermore, in order to examine the difficulty of the
                 two emotion cause detection tasks and the contributions
                 of texts written by different users in a microblog with
                 the multiple-user structure, we choose two popular
                 classification methods (SVM and LSTM) to do emotion
                 cause detection. Our experiments show that the
                 current-subtweet-based emotion cause detection is much
                 more difficult than the original-subtweet-based emotion
                 cause detection, and texts written by different users
                 are very helpful for both emotion cause detection
                 tasks. This study presents a pilot study of emotion
                 cause detection which deals with Chinese microblogs
                 using a complicated structure.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sarma:2017:DAS,
  author =       "Himangshu Sarma and Navanath Saharia and Utpal
                 Sharma",
  title =        "Development and Analysis of Speech Recognition Systems
                 for {Assamese} Language Using {HTK}",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "1",
  pages =        "7:1--7:??",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3137055",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Language analysis is very important for the native
                 speaker to connect with the digital world. Assamese is
                 a relatively unexplored language. In this report, we
                 analyze different aspects of speech-to-text processing,
                 starting from building a speech corpus, defining
                 syllable rules, and finally developing a speech search
                 engine of Assamese. We have collected about 20 hours of
                 speech in three (viz., read, extempore, and
                 conversation) modes and transcribed it. We also discuss
                 some issues and challenges faced during development of
                 the corpus. We have developed an automatic
                 syllabification model with 11 rules for the Assamese
                 language and found an accuracy of more than 95\% in our
                 result. We found 12 different syllable patterns where 5
                 are found most frequent. The maximum length of a
                 syllable found is four letters. With the help of Hidden
                 Markov Model Toolkit (HTK) 3.5, we used deep learning
                 based neural network for our speech recognition model,
                 where we obtained 78.05\% accuracy for automatic
                 transcription of Assamese speech.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bhattacharya:2017:COB,
  author =       "Nilanjana Bhattacharya and Umapada Pal and Partha
                 Pratim Roy",
  title =        "Cleaning of Online {Bangla} Free-form Handwritten
                 Text",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "1",
  pages =        "8:1--8:??",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3145538",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Dec 23 10:06:06 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In the normal free-form handwritten text, repetition
                 (repeated writing of the same stroke several times in
                 the same place), over-writing, and crossing out are
                 very common. In this article, we call the presence of
                 these three types of writing as ``noise.'' Cleaning to
                 extract useful text from such types of noisy text is an
                 important task for robust recognition. To the best of
                 our knowledge, no work has been reported on cleaning of
                 such noise from online text in any scripts and hence,
                 in this article, we propose an automatic text-cleaning
                 approach for online handwriting recognition. Here, at
                 first, crossing out noise with straight strike-through
                 lines is detected using the straightness criteria of
                 online strokes. Next, regions containing repetition,
                 over-writing, and other types of crossing out are
                 located using the positional information of the
                 overlapping strokes. Stroke density, self-intersections
                 of strokes etc. are computed from the strokes of
                 located regions to predict the type of noise and this
                 type of information is used as follows for their
                 cleaning. For cleaning of crossing outs, all strokes of
                 the crossing-out region are removed. For cleaning
                 repetition and over-writing, strokes written earlier
                 are removed, keeping the latest strokes. Finally,
                 delayed strokes are properly arranged and word is
                 passed to online recognizer. Though recognition of
                 free-form handwriting is quite difficult, in this
                 attempt, we obtained up to 70.71\% improvement in
                 word-recognition accuracy after noise cleaning.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Nasution:2018:GCA,
  author =       "Arbi Haza Nasution and Yohei Murakami and Toru
                 Ishida",
  title =        "A Generalized Constraint Approach to Bilingual
                 Dictionary Induction for Low-Resource Language
                 Families",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "2",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3138815",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "The lack or absence of parallel and comparable corpora
                 makes bilingual lexicon extraction a difficult task for
                 low-resource languages. The pivot language and cognate
                 recognition approaches have been proven useful for
                 inducing bilingual lexicons for such languages. We
                 propose constraint-based bilingual lexicon induction
                 for closely related languages by extending constraints
                 from the recent pivot-based induction technique and
                 further enabling multiple symmetry assumption cycle to
                 reach many more cognates in the transgraph. We further
                 identify cognate synonyms to obtain many-to-many
                 translation pairs. This article utilizes four datasets:
                 one Austronesian low-resource language and three
                 Indo-European high-resource languages. We use three
                 constraint-based methods from our previous work, the
                 Inverse Consultation method and translation pairs
                 generated from Cartesian product of input dictionaries
                 as baselines. We evaluate our result using the metrics
                 of precision, recall, and F-score. Our customizable
                 approach allows the user to conduct cross validation to
                 predict the optimal hyperparameters (cognate threshold
                 and cognate synonym threshold) with various combination
                 of heuristics and number of symmetry assumption cycles
                 to gain the highest F-score. Our proposed methods have
                 statistically significant improvement of precision and
                 F-score compared to our previous constraint-based
                 methods. The results show that our method demonstrates
                 the potential to complement other bilingual dictionary
                 creation methods like word alignment models using
                 parallel corpora for high-resource languages while well
                 handling low-resource languages.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Onyenwe:2018:BLR,
  author =       "Ikechukwu E. Onyenwe and Mark Hepple and Uchechukwu
                 Chinedu and Ignatius Ezeani",
  title =        "A Basic Language Resource Kit Implementation for the
                 {Igbo} {NLP} Project",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "2",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3146387",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Igbo, an African language with around 32 million
                 speakers worldwide, is one of the many languages having
                 few or none of the language processing resources needed
                 for advanced language technology applications. In this
                 article, we describe the approach taken to creating an
                 initial set of resources for Igbo, including an
                 electronic text corpus, a part-of-speech (POS) tagset,
                 and a POS-tagged subcorpus. We discuss the approach
                 taken in gathering texts, the preprocessing of these
                 texts, and the development of the POS tagged corpus. We
                 also discuss some of the problems encountered during
                 corpus and tagset development and the solutions arrived
                 at for these problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jia:2018:IDP,
  author =       "Yanyan Jia and Yansong Feng and Yuan Ye and Chao Lv
                 and Chongde Shi and Dongyan Zhao",
  title =        "Improved Discourse Parsing with Two-Step Neural
                 Transition-Based Model",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "2",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3152537",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Discourse parsing aims to identify structures and
                 relationships between different discourse units. Most
                 existing approaches analyze a whole discourse at once,
                 which often fails in distinguishing long-span relations
                 and properly representing discourse units. In this
                 article, we propose a novel parsing model to analyze
                 discourse in a two-step fashion with different feature
                 representations to characterize intra sentence and
                 inter sentence discourse structures, respectively. Our
                 model works in a transition-based framework and
                 benefits from a stack long short-term memory neural
                 network model. Experiments on benchmark tree banks show
                 that our method outperforms traditional 1-step parsing
                 methods in both English and Chinese.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Naili:2018:CSS,
  author =       "Marwa Naili and Anja Habacha Chaibi and Henda {Hajjami
                 Ben Ghezala}",
  title =        "The Contribution of Stemming and Semantics in {Arabic}
                 Topic Segmentation",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "2",
  pages =        "12:1--12:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3152464",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Topic Segmentation is one of the pillars of Natural
                 Language Processing. Yet there is a remarkable research
                 gap in this field, as far as the Arabic language is
                 concerned. The purpose of this article is to improve
                 Arabic Topic Segmentation (ATS) by inquiring into two
                 segmenters: ArabC99 and ArabTextTiling. This study is
                 carried out on two independent levels: the
                 pre-processing level and the segmentation level. These
                 levels represent the basic steps of topic segmentation.
                 On the pre-processing level, we examine the effect of
                 using different Arabic stemming algorithms on ATS. We
                 find out that Light10 is more appropriate for the
                 pre-processing step. Based on this conclusion, we
                 proceed to the second level by proposing two Arabic
                 segmenters called ArabC99-LS-LSA and
                 ArabTextTiling-LS-LSA. These latter use external
                 semantic knowledge related to the Latent Semantic
                 Analysis (LSA). Based on the evaluation results, we
                 notice that LSA provides improvements in this field.
                 Hence, the main outcome of this article emphasizes the
                 multilevel improvement of ATS based on Light10 and
                 LSA.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Fujita:2018:EPL,
  author =       "Atsushi Fujita and Pierre Isabelle",
  title =        "Expanding Paraphrase Lexicons by Exploiting
                 Generalities",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "2",
  pages =        "13:1--13:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3160488",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Techniques for generating and recognizing paraphrases,
                 i.e., semantically equivalent expressions, play an
                 important role in a wide range of natural language
                 processing tasks. In the last decade, the task of
                 automatic acquisition of subsentential paraphrases,
                 i.e., words and phrases with (approximately) the same
                 meaning, has been drawing much attention in the
                 research community. The core problem is to obtain
                 paraphrases of high quality in large quantity. This
                 article presents a method for tackling this issue by
                 systematically expanding an initial seed lexicon made
                 up of high-quality paraphrases. This involves
                 automatically capturing morpho-semantic and syntactic
                 generalizations within the lexicon and using them to
                 leverage the power of large-scale monolingual data.
                 Given an input set of paraphrases, our method starts by
                 inducing paraphrase patterns that constitute
                 generalizations over corresponding pairs of lexical
                 variants, such as ``amending'' and ``amendment,'' in a
                 fully empirical way. It then searches large-scale
                 monolingual data for new paraphrases matching those
                 patterns. The results of our experiments on English,
                 French, and Japanese demonstrate that our method
                 manages to expand seed lexicons by a large multiple.
                 Human evaluation based on paraphrase substitution tests
                 reveals that the automatically acquired paraphrases are
                 also of high quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2018:EEW,
  author =       "Shaonan Wang and Jiajun Zhang and Chengqing Zong",
  title =        "Empirical Exploring Word-Character Relationship for
                 {Chinese} Sentence Representation",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "14:1--14:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3156778",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article addresses the problem of learning
                 compositional Chinese sentence representations, which
                 represent the meaning of a sentence by composing the
                 meanings of its constituent words. In contrast to
                 English, a Chinese word is composed of characters,
                 which contain rich semantic information. However, this
                 information has not been fully exploited by existing
                 methods. In this work, we introduce a novel, mixed
                 character-word architecture to improve the Chinese
                 sentence representations by utilizing rich semantic
                 information of inner-word characters. We propose two
                 novel strategies to reach this purpose. The first one
                 is to use a mask gate on characters, learning the
                 relation among characters in a word. The second one is
                 to use a max-pooling operation on words to adaptively
                 find the optimal mixture of the atomic and
                 compositional word representations. Finally, the
                 proposed architecture is applied to various sentence
                 composition models, which achieves substantial
                 performance gains over baseline models on sentence
                 similarity task. To further verify the generalization
                 ability of our model, we employ the learned sentence
                 representations as features in sentence classification
                 task, question classification task, and sentence
                 entailment task. Results have shown that the proposed
                 mixed character-word sentence representation models
                 outperform both the character-based and word-based
                 models.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jia:2018:COR,
  author =       "Shengbin Jia and Shijia E. and Maozhen Li and Yang
                 Xiang",
  title =        "{Chinese} Open Relation Extraction and Knowledge Base
                 Establishment",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "15:1--15:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3162077",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Named entity relation extraction is an important
                 subject in the field of information extraction.
                 Although many English extractors have achieved
                 reasonable performance, an effective system for Chinese
                 relation extraction remains undeveloped due to the lack
                 of Chinese annotation corpora and the specificity of
                 Chinese linguistics. Here, we summarize three kinds of
                 unique but common phenomena in Chinese linguistics. In
                 this article, we investigate unsupervised
                 linguistics-based Chinese open relation extraction
                 (ORE), which can automatically discover arbitrary
                 relations without any manually labeled datasets, and
                 research the establishment of a large-scale corpus. By
                 mapping the entity relations into dependency-trees and
                 considering the unique Chinese linguistic
                 characteristics, we propose a novel unsupervised
                 Chinese ORE model based on Dependency Semantic Normal
                 Forms (DSNFs). This model imposes no restrictions on
                 the relative positions among entities and relationships
                 and achieves a high yield by extracting relations
                 mediated by verbs or nouns and processing the parallel
                 clauses. Empirical results from our model demonstrate
                 the effectiveness of this method, which obtains stable
                 performance on four heterogeneous datasets and achieves
                 better precision and recall in comparison with several
                 Chinese ORE systems. Furthermore, a large-scale
                 knowledge base of entity and relation, called COER, is
                 established and published by applying our method to web
                 text, which conquers the trouble of lack of Chinese
                 corpora.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Marie:2018:PTI,
  author =       "Benjamin Marie and Atsushi Fujita",
  title =        "Phrase Table Induction Using Monolingual Data for
                 Low-Resource Statistical Machine Translation",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "16:1--16:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3168054",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "We propose a new method for inducing a phrase-based
                 translation model from a pair of unrelated monolingual
                 corpora. Our method is able to deal with phrases of
                 arbitrary length and to find phrase pairs that are
                 useful for statistical machine translation, without
                 requiring large parallel or comparable corpora. First,
                 our method generates phrase pairs through coupling
                 source and target phrases separately collected from
                 respective monolingual data. Then, for each phrase
                 pair, we compute features using the monolingual data
                 and a small quantity of parallel sentences. Finally,
                 incorrect phrase pairs are pruned, and a phrase table
                 is made using the remaining phrase pairs. In our
                 experiments on French--Japanese and Spanish--Japanese
                 translation tasks under low-resource conditions, we
                 observe that incorporating a phrase table induced by
                 our method to the machine translation system leads to
                 large improvements in translation quality. Furthermore,
                 we show that a phrase table induced by our method can
                 also be useful in a wide range of configurations,
                 including configurations where we have already access
                 to large parallel corpora and configurations where only
                 small monolingual corpora are available.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Salami:2018:ISS,
  author =       "Shahram Salami and Mehrnoush Shamsfard",
  title =        "Integrating Shallow Syntactic Labels in the
                 Phrase-Boundary Translation Model",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "17:1--17:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3178460",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Using a novel rule labeling method, this article
                 proposes a hierarchical model for statistical machine
                 translation. The proposed model labels translation
                 rules by matching the boundaries of target side phrases
                 with the shallow syntactic labels including POS tags
                 and chunk labels on the target side of the training
                 corpus. The boundary labels are concatenated if there
                 is no label for the whole target span. Labeling with
                 the classes of boundary words on the target side
                 phrases has been previously proposed as a
                 phrase-boundary model which can be considered as the
                 base form of our model. In the extended model, the
                 labeler uses a POS tag if there is no chunk label in
                 one boundary. Using chunks as phrase labels, the
                 proposed model generalizes the rules to decrease the
                 model sparseness. The sparseness is a more important
                 issue in the language pairs with a lot of differences
                 in the word order because they have less number of
                 aligned phrase pairs for extraction of rules. The
                 extended phrase-boundary model is also applicable for
                 low-resource languages having no syntactic parser. Some
                 experiments are performed with the proposed model, the
                 base phrase-boundary model, and variants of Syntax
                 Augmented Machine Translation (SAMT) in translation
                 from Persian and German to English as source and target
                 languages with different word orders. According to the
                 results, the proposed model improves the translation
                 performance in the quality and decoding time aspects.
                 Using BLEU as our metric, the proposed model has
                 achieved a statistically significant improvement of
                 about 0.5 point over the base phrase-boundary model.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sherkawi:2018:ASA,
  author =       "Lina Sherkawi and Nada Ghneim and Oumayma {Al
                 Dakkak}",
  title =        "{Arabic} Speech Act Recognition Techniques",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "18:1--18:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3170576",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article presents rule-based and statistical-based
                 techniques for Arabic speech act recognition. The
                 proposed techniques classify an utterance into Arabic
                 speech act categories based on three criteria: surface
                 features, cue words, and contextual information. A
                 rule-based expert system has been developed in a
                 bootstrapping manner based on the fact that Arabic
                 language syntax is inherently rule-based. Various
                 machine-learning algorithms have been used to detect
                 Arabic speech act categories: Decision Tree, Na{\"\i}ve
                 Bayes, Neural Network, and SVM. We compare the
                 experimental results for both techniques
                 (machine-learning and rule-based expert systems). Using
                 a corpus of 1,500 sentences, the rule-based expert
                 system achieved an accuracy rate of 98.92\%, while the
                 Decision Tree, Na{\"\i}ve Bayes, Neural Network, and
                 SVM achieved an accuracy rate of 97.09\%, 96.48\%,
                 93.50\%, and 93.70\%, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jung:2018:EEK,
  author =       "Sangkeun Jung and Changki Lee and Hyunsun Hwang",
  title =        "End-to-End {Korean} Part-of-Speech Tagging Using
                 Copying Mechanism",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "19:1--19:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3178458",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In this article, we introduce a novel neural
                 architecture for the end-to-end Korean Part-of-Speech
                 (POS) tagging problem. To address the problem, we
                 extend the present recurrent neural network-based
                 sequence-to-sequence models to deal with the key
                 challenges in this task: rare word generation and POS
                 tagging. To overcome these issues, Input-Feeding and
                 Copying mechanism are adopted. Although our approach
                 does not require any manual features or preprocessed
                 pattern matching dictionaries, our best single model
                 achieves an F-score of 97.08. This is competitive with
                 the current state-of-the-art model (F-score 98.03),
                 which requires extensive manual feature processing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sen:2018:AST,
  author =       "Shibaprasad Sen and Ankan Bhattacharyya and Pawan
                 Kumar Singh and Ram Sarkar and Kaushik Roy and David
                 Doermann",
  title =        "Application of Structural and Topological Features to
                 Recognize Online Handwritten {Bangla} Characters",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "20:1--20:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3178457",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article presents a set of novel features for
                 robust online Bangla handwritten character recognition.
                 Two feature extraction methods are presented here. The
                 first describes the transition from background to
                 foreground pixels and vice versa. The second uses a
                 combination of topological features and
                 centre-of-gravity- (CG) based circular features where
                 global information, local information, and Circular
                 Quadrant Mass Distribution information have been
                 extracted. The impact of each along with their
                 combination have also been analyzed. A total of 15,000
                 isolated online Bangla character samples have been
                 collected and used for the evaluation. A Support Vector
                 Machine classifier records the best recognition rate
                 when the transition count feature, CG-based circular
                 features, and topological features are combined.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{She:2018:LHD,
  author =       "Xiaohan She and Ping Jian and Pengcheng Zhang and
                 Heyan Huang",
  title =        "Leveraging Hierarchical Deep Semantics to Classify
                 Implicit Discourse Relations via a Mutual Learning
                 Method",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "21:1--21:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3178456",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "This article presents a mutual learning method using
                 hierarchical deep semantics for the classification of
                 implicit discourse relations in English. With the
                 absence of explicit discourse markers, traditional
                 discourse techniques mainly concentrate on discrete
                 linguistic features in this task, which always leads to
                 a data sparseness problem. To relieve this problem, we
                 propose a mutual learning neural model that makes use
                 of multilevel semantic information together, including
                 the distribution of implicit discourse relations, the
                 semantics of arguments, and the co-occurrence of
                 phrases and words. During the training process, the
                 predicting targets of the model, which are the
                 probability of the discourse relation type and the
                 distributed representation of semantic components, are
                 learned jointly and optimized mutually. The
                 experimental results show that this method outperforms
                 the previous works, especially in multiclass
                 identification attributed to the hierarchical semantic
                 representations and the mutual learning strategy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mohamed:2018:MSP,
  author =       "Emad Mohamed",
  title =        "Morphological Segmentation and Part-of-Speech Tagging
                 for the {Arabic} Heritage",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "22:1--22:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3178459",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "We annotate 60,000 words of Classical Arabic (CA) with
                 topics in philosophy, religion, literature, and law
                 with fine-grain segment-based morphological
                 descriptions. We use these annotations for building a
                 morphological segmenter and part-of-speech (POS) tagger
                 for CA. With character-level classification and
                 features from the word and its lexical context, the
                 segmenter achieves a word accuracy of 96.8\% with the
                 main issue being a high rate of out-of-vocabulary
                 words. A token-based POS tagger achieves an accuracy of
                 96.22\% with 97.72\% on known tokens despite the small
                 size of the corpus. An error analysis shows that most
                 of the tagging errors are results of segmentation and
                 that quality improves with more data being added. The
                 morphological segmenter and tagger have a wide range of
                 potential applications in processing CA, a low-resource
                 variety of the language.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Huang:2018:IPK,
  author =       "Degen Huang and Jiahuan Pei and Cong Zhang and Kaiyu
                 Huang and Jianjun Ma",
  title =        "Incorporating Prior Knowledge into Word Embedding for
                 {Chinese} Word Similarity Measurement",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "23:1--23:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3182622",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Word embedding-based methods have received increasing
                 attention for their flexibility and effectiveness in
                 many natural language-processing (NLP) tasks, including
                 Word Similarity (WS). However, these approaches rely on
                 high-quality corpus and neglect prior knowledge.
                 Lexicon-based methods concentrate on human's
                 intelligence contained in semantic resources, e.g.,
                 Tongyici Cilin, HowNet, and Chinese WordNet, but they
                 have the drawback of being unable to deal with unknown
                 words. This article proposes a three-stage framework
                 for measuring the Chinese word similarity by
                 incorporating prior knowledge obtained from lexicons
                 and statistics into word embedding: in the first stage,
                 we utilize retrieval techniques to crawl the contexts
                 of word pairs from web resources to extend context
                 corpus. In the next stage, we investigate three types
                 of single similarity measurements, including lexicon
                 similarities, statistical similarities, and
                 embedding-based similarities. Finally, we exploit
                 simple combination strategies with math operations and
                 the counter-fitting combination strategy using
                 optimization method. To demonstrate our system's
                 efficiency, comparable experiments are conducted on the
                 PKU-500 dataset. Our final results are 0.561/0.516 of
                 Spearman/Pearson rank correlation coefficient, which
                 outperform the state-of-the-art performance to the best
                 of our knowledge. Experiment results on Chinese MC-30
                 and SemEval-2012 datasets show that our system also
                 performs well on other Chinese datasets, which proves
                 its transferability. Besides, our system is not
                 language-specific and can be applied to other
                 languages, e.g., English.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ehsani:2018:CWT,
  author =       "Razieh Ehsani and Ercan Solak and Olcay Taner Yildiz",
  title =        "Constructing a {WordNet} for {Turkish} Using Manual
                 and Automatic Annotation",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "24:1--24:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3185664",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "In this article, we summarize the methodology and the
                 results of our 2-year-long efforts to construct a
                 comprehensive WordNet for Turkish. In our approach, we
                 mine a dictionary for synonym candidate pairs and
                 manually mark the senses in which the candidates are
                 synonymous. We marked every pair twice by different
                 human annotators. We derive the synsets by finding the
                 connected components of the graph whose edges are
                 synonym senses. We also mined Turkish Wikipedia for
                 hypernym relations among the senses. We analyzed the
                 resulting WordNet to highlight the difficulties brought
                 about by the dictionary construction methods of
                 lexicographers. After splitting the unusually large
                 synsets, we used random walk-based clustering that
                 resulted in a Zipfian distribution of synset sizes. We
                 compared our results to BalkaNet and automatic
                 thesaurus construction methods using variation of
                 information metric. Our Turkish WordNet is available
                 online.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Huang:2018:LRR,
  author =       "Jizhou Huang and Shiqiang Ding and Haifeng Wang and
                 Ting Liu",
  title =        "Learning to Recommend Related Entities With
                 Serendipity for {Web} Search Users",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "3",
  pages =        "25:1--25:??",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3185663",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Entity recommendation, providing entity suggestions to
                 assist users in discovering interesting information,
                 has become an indispensable feature of today's Web
                 search engine. However, the majority of existing entity
                 recommendation methods are not designed to boost the
                 performance in terms of serendipity, which also plays
                 an important role in the appreciation of users for a
                 recommendation system. To keep users engaged, it is
                 important to take into account serendipity when
                 building an entity recommendation system. In this
                 article, we propose a learning to recommend framework
                 that consists of two components: related entity finding
                 and candidate entity ranking. To boost serendipity
                 performance, three different sets of features that
                 correlate with the three aspects of serendipity are
                 employed in the proposed framework. Extensive
                 experiments are conducted on large-scale, real-world
                 datasets collected from a widely used commercial Web
                 search engine. The experiments show that our method
                 significantly outperforms several strong baseline
                 methods. An analysis on the impact of features reveals
                 that the set of interestingness features is the most
                 powerful feature set, and the set of unexpectedness
                 features can significantly contribute to recommendation
                 effectiveness. In addition, online controlled
                 experiments conducted on a commercial Web search engine
                 demonstrate that our method can significantly improve
                 user engagement against multiple baseline methods. This
                 further confirms the effectiveness of the proposed
                 framework.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Basiri:2018:WII,
  author =       "Mohammad Ehsan Basiri and Arman Kabiri",
  title =        "Words Are Important: Improving Sentiment Analysis in
                 the {Persian} Language by Lexicon Refining",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "4",
  pages =        "26:1--26:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3195633",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Lexicon-based sentiment analysis (SA) aims to address
                 the problem of extracting people's opinions from their
                 comments on the Web using a predefined lexicon of
                 opinionated words. In contrast to the machine learning
                 (ML) approach, lexicon-based methods are
                 domain-independent methods that do not need a large
                 annotated training corpus and hence are faster. This
                 makes the lexicon-based approach prevalent in the SA
                 community. However, the story is different for the
                 Persian language. In contrast to English, using the
                 lexicon-based method in Persian is a new discipline.
                 There are rather limited resources available for SA in
                 Persian, making the accuracy of the existing
                 lexicon-based methods lower than other languages. In
                 the current study, first an exhaustive investigation of
                 the lexicon-based method is performed. Then two new
                 resources are introduced to address the problem of
                 resource scarcity for SA in Persian: a carefully
                 labeled lexicon of sentiment words, PerLex, and a new
                 handmade dataset of about 16,000 rated documents,
                 PerView. Moreover, a new hybrid method using both ML
                 and the lexicon-based approach is presented in which
                 PerLex words are used to train the ML algorithm.
                 Experiments are carried out on our new PerView dataset.
                 Results indicate that the accuracy of PerLex is higher
                 than the existing CNRC, Adjectives, SentiStrength,
                 PerSent, and LexiPers lexicons. In addition, the
                 results show that using PerLex significantly decreases
                 the execution time of the proposed system in comparison
                 to the above-mentioned lexicons. Moreover, the results
                 demonstrate the excellence of using opinionated lexicon
                 terms followed by bigrams as the features employed in
                 the ML method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Suryani:2018:RBS,
  author =       "Arie Ardiyanti Suryani and Dwi Hendratmo Widyantoro
                 and Ayu Purwarianti and Yayat Sudaryat",
  title =        "The Rule-Based {Sundanese} Stemmer",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "4",
  pages =        "27:1--27:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3195634",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Our research proposed an iterative Sundanese stemmer
                 by removing the derivational affixes prior to the
                 inflexional. This scheme was chosen because, in the
                 Sundanese affixation, a confix (one of derivational
                 affix) is applied in the last phase of a morphological
                 process. Moreover, most of Sundanese affixes are
                 derivational, so removing the derivational affix as the
                 first step is reasonable. To handle ambiguity, the last
                 recognized affix was returned as the result. As the
                 baseline, a Confix-Stripping Approach that applies
                 Porter Stemmer for the Indonesian language was used.
                 This stemmer shares similarities in terms of affix
                 type, but uses a different stemming order. To observe
                 whether the baseline stems the Sundanese affixed word
                 properly, some features that were not covered by the
                 baseline, such as the infix and allomorph removal, were
                 added. The evaluation was done using 4,453 unique
                 affixed words collected from Sundanese online
                 magazines. The experiment shows that, as a whole, our
                 stemmer outperforms the modified baseline in terms of
                 recognized affixed type accuracy and properly stemmed
                 affixed words. Our stemmer recognized 68.87\% of the
                 Sundanese affixed types and produced 96.79\% of the
                 correctly affixed words; the modified baseline resulted
                 in 21.70\% and 71.59\%, respectively",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{He:2018:DPS,
  author =       "Ruifang He and Yaru Wang and Dawei Song and Peng Zhang
                 and Yuan Jia and Aijun Li",
  title =        "A Dependency Parser for Spontaneous {Chinese} Spoken
                 Language",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "4",
  pages =        "28:1--28:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3196278",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Dependency analysis is vital for spoken language
                 understanding in spoken dialogue systems. However,
                 existing research has mainly focused on western spoken
                 languages, Japanese, and so on. Little research has
                 been done for spoken Chinese in terms of dependency
                 parsing. Therefore, the new spoken corpus, D-ESCSC
                 (Dependency-Expressive Speech Corpus of Standard
                 Chinese) is built by adding new dependency relations
                 special to spoken Chinese based on a written Chinese
                 annotation scheme. Since spoken Chinese contains
                 typical ill-grammatical phenomena, e.g., translocation,
                 repetition, duplication, and omission, the new atom
                 feature related to punctuation and three feature
                 templates are proposed to improve a graph-based
                 dependency parser. Experimental results on spoken
                 Chinese corpus show that the atom feature and three
                 templates really work and the new parser outperforms
                 the baseline parser. To our best knowledge, it is the
                 first work to report dependency parsing results of
                 spoken Chinese.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bai:2018:IVS,
  author =       "Xuefeng Bai and Hailong Cao and Tiejun Zhao",
  title =        "Improving Vector Space Word Representations Via Kernel
                 Canonical Correlation Analysis",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "4",
  pages =        "29:1--29:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3197566",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Cross-lingual word embeddings are representations for
                 vocabularies of two or more languages in one common
                 continuous vector space and are widely used in various
                 natural language processing tasks. A state-of-the-art
                 way to generate cross-lingual word embeddings is to
                 learn a linear mapping, with an assumption that the
                 vector representations of similar words in different
                 languages are related by a linear relationship.
                 However, this assumption does not always hold true,
                 especially for substantially different languages. We
                 therefore propose to use kernel canonical correlation
                 analysis to capture a non-linear relationship between
                 word embeddings of two languages. By extensively
                 evaluating the learned word embeddings on three tasks
                 (word similarity, cross-lingual dictionary induction,
                 and cross-lingual document classification) across five
                 language pairs, we demonstrate that our proposed
                 approach achieves essentially better performances than
                 previous linear methods on all of the three tasks,
                 especially for language pairs with substantial
                 typological difference.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Park:2018:NCI,
  author =       "Taekeun Park and Seung-Hoon Kim",
  title =        "Novel Character Identification Utilizing Semantic
                 Relation with Animate Nouns in {Korean}",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "4",
  pages =        "30:1--30:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3197657",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "For identifying speakers of quoted speech or
                 extracting social networks from literature, it is
                 indispensable to extract character names and nominals.
                 However, detecting proper nouns in the novels
                 translated into or written in Korean is harder than in
                 English because Korean does not have a capitalization
                 feature. In addition, it is almost impossible for any
                 proper noun dictionary to include all kinds of
                 character names that have been created or will be
                 created by authors. Fortunately, a previous study shows
                 that utilizing postpositions for animate nouns is a
                 simple and effective tool for character identification
                 in Korean novels without a proper noun dictionary and a
                 training corpus. In this article, we propose a
                 character identification method utilizing the semantic
                 relation with known animate nouns. For 80 novels in
                 Korean, the proposed method increases the micro- and
                 macro-average recall by 13.68\% and 11.86\%,
                 respectively, while decreasing the micro-average
                 precision by 0.28\% and increasing the macro-average
                 precision by 0.07\% compared to the previous study. If
                 we focus on characters that are responsible for more
                 than 1\% of the character name mentions in each novel,
                 the micro- and macro-average F-measure of the proposed
                 method are 96.98\% and 97.32\%, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2018:GBB,
  author =       "Rui Wang and Hai Zhao and Sabine Ploux and Bao-Liang
                 Lu and Masao Utiyama and Eiichiro Sumita",
  title =        "Graph-Based Bilingual Word Embedding for Statistical
                 Machine Translation",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "4",
  pages =        "31:1--31:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3203078",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Bilingual word embedding has been shown to be helpful
                 for Statistical Machine Translation (SMT). However,
                 most existing methods suffer from two obvious
                 drawbacks. First, they only focus on simple contexts
                 such as an entire document or a fixed-sized sliding
                 window to build word embedding and ignore latent useful
                 information from the selected context. Second, the word
                 sense but not the word should be the minimal semantic
                 unit; however, most existing methods still use word
                 representation. To overcome these drawbacks, this
                 article presents a novel Graph-Based Bilingual Word
                 Embedding (GBWE) method that projects bilingual word
                 senses into a multidimensional semantic space. First, a
                 bilingual word co-occurrence graph is constructed using
                 the co-occurrence and pointwise mutual information
                 between the words. Then, maximum complete subgraphs
                 (cliques), which play the role of a minimal unit for
                 bilingual sense representation, are dynamically
                 extracted according to the contextual information.
                 Consequently, correspondence analysis, principal
                 component analyses, and neural networks are used to
                 summarize the clique-word matrix into lower dimensions
                 to build the embedding model. Without contextual
                 information, the proposed GBWE can be applied to
                 lexical translation. In addition, given contextual
                 information, GBWE is able to give a dynamic solution
                 for bilingual word representations, which can be
                 applied to phrase translation and generation. Empirical
                 results show that GBWE can enhance the performance of
                 lexical translation, as well as
                 Chinese/French-to-English and Chinese-to-Japanese
                 phrase-based SMT tasks (IWSLT, NTCIR, NIST, and WAT).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Hamdi:2018:CCS,
  author =       "Ali Hamdi and Khaled Shaban and Anazida Zainal",
  title =        "{CLASENTI}: a Class-Specific Sentiment Analysis
                 Framework",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "4",
  pages =        "32:1--32:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3209885",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Arabic text sentiment analysis suffers from low
                 accuracy due to Arabic-specific challenges (e.g.,
                 limited resources, morphological complexity, and
                 dialects) and general linguistic issues (e.g.,
                 fuzziness, implicit sentiment, sarcasm, and spam). The
                 limited resources problem requires efforts to build new
                 and improved Arabic corpora and lexica. We propose a
                 class-specific sentiment analysis (CLASENTI) framework.
                 The framework includes a new annotation approach to
                 build multi-faceted Arabic corpus and lexicon allowing
                 for simultaneous annotation of different facets,
                 including domains, dialects, linguistic issues, and
                 polarity strengths. Each of these facets has multiple
                 classes (e.g., the nine classes representing dialects
                 found in the Arab world). The new corpus and lexicon
                 annotations facilitate the development of new
                 class-specific classification models and polarity
                 strength calculation. For the new sentiment
                 classification models, we propose a hybrid model
                 combining corpus-based and lexicon-based models. The
                 corpus-based model has two interrelated phases to
                 build; (1) full-corpus classification models for all
                 facets; and (2) class-specific models trained on
                 filtered subsets of the corpus according to the
                 performances of the full-corpus models. To calculate
                 polarity strengths, the lexicon-based model filters the
                 annotated lexicon based on the specific classes of the
                 domain and dialect. As a case study, we collect and
                 annotate 15274 reviews from various sources, including
                 surveys, Facebook comments, and Twitter posts,
                 pertaining to governmental services. In addition, we
                 develop a new web-based application to apply the
                 proposed framework on the case study. CLASENTI
                 framework reaches up to 95\% accuracy and 93\% F1-Score
                 surpassing the best-known sentiment classifiers
                 implemented in Scikit-learn library that achieve 82\%
                 accuracy and 81\% F1-Score for Arabic when tested on
                 the same dataset.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2018:DSN,
  author =       "Limin Wang and Shoushan Li and Qian Yan and Guodong
                 Zhou",
  title =        "Domain-specific Named Entity Recognition with
                 Document-Level Optimization",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "4",
  pages =        "33:1--33:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3213544",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Previous studies normally formulate named entity
                 recognition (NER) as a sequence labeling task and
                 optimize the solution in the sentence level. In this
                 article, we propose a document-level optimization
                 approach to NER and apply it in a domain-specific
                 document-level NER task. As a baseline, we apply a
                 state-of-the-art approach, i.e., long-short-term memory
                 (LSTM), to perform word classification. On this basis,
                 we define a global objective function with the obtained
                 word classification results and achieve global
                 optimization via Integer Linear Programming (ILP).
                 Specifically, in the ILP-based approach, we propose
                 four kinds of constraints, i.e., label transition,
                 entity length, label consistency, and domain-specific
                 regulation constraints, to incorporate various entity
                 recognition knowledge in the document level. Empirical
                 studies demonstrate the effectiveness of the proposed
                 approach to domain-specific document-level NER.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Komiya:2018:CMA,
  author =       "Kanako Komiya and Masaya Suzuki and Tomoya Iwakura and
                 Minoru Sasaki and Hiroyuki Shinnou",
  title =        "Comparison of Methods to Annotate Named Entity
                 Corpora",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "4",
  pages =        "34:1--34:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3218820",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "The authors compared two methods for annotating a
                 corpus for the named entity (NE) recognition task using
                 non-expert annotators: (i) revising the results of an
                 existing NE recognizer and (ii) manually annotating the
                 NEs completely. The annotation time, degree of
                 agreement, and performance were evaluated based on the
                 gold standard. Because there were two annotators for
                 one text for each method, two performances were
                 evaluated: the average performance of both annotators
                 and the performance when at least one annotator is
                 correct. The experiments reveal that semi-automatic
                 annotation is faster, achieves better agreement, and
                 performs better on average. However, they also indicate
                 that sometimes, fully manual annotation should be used
                 for some texts whose document types are substantially
                 different from the training data document types. In
                 addition, the machine learning experiments using
                 semi-automatic and fully manually annotated corpora as
                 training data indicate that the F-measures could be
                 better for some texts when manual instead of
                 semi-automatic annotation was used. Finally,
                 experiments using the annotated corpora for training as
                 additional corpora show that (i) the NE recognition
                 performance does not always correspond to the
                 performance of the NE tag annotation and (ii) the
                 system trained with the manually annotated corpus
                 outperforms the system trained with the
                 semi-automatically annotated corpus with respect to
                 newswires, even though the existing NE recognizer was
                 mainly trained with newswires.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhou:2018:WSP,
  author =       "Deyu Zhou and Zhikai Zhang and Min-Ling Zhang and
                 Yulan He",
  title =        "Weakly Supervised {POS} Tagging without
                 Disambiguation",
  journal =      j-TALLIP,
  volume =       "17",
  number =       "4",
  pages =        "35:1--35:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3214707",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:31 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  abstract =     "Weakly supervised part-of-speech (POS) tagging is to
                 learn to predict the POS tag for a given word in
                 context by making use of partial annotated data instead
                 of the fully tagged corpora. Weakly supervised POS
                 tagging would benefit various natural language
                 processing applications in such languages where tagged
                 corpora are mostly unavailable. In this article, we
                 propose a novel framework for weakly supervised POS
                 tagging based on a dictionary of words with their
                 possible POS tags. In the constrained error-correcting
                 output codes (ECOC)-based approach, a unique L -bit
                 vector is assigned to each POS tag. The set of
                 bitvectors is referred to as a coding matrix with value
                 { 1, -1}. Each column of the coding matrix specifies a
                 dichotomy over the tag space to learn a binary
                 classifier. For each binary classifier, its training
                 data is generated in the following way: each pair of
                 words and its possible POS tags are considered as a
                 positive training example only if the whole set of its
                 possible tags falls into the positive dichotomy
                 specified by the column coding and similarly for
                 negative training examples. Given a word in context,
                 its POS tag is predicted by concatenating the
                 predictive outputs of the L binary classifiers and
                 choosing the tag with the closest distance according to
                 some measure. By incorporating the ECOC strategy, the
                 set of all possible tags for each word is treated as an
                 entirety without the need of performing disambiguation.
                 Moreover, instead of manual feature engineering
                 employed in most previous POS tagging approaches,
                 features for training and testing in the proposed
                 framework are automatically generated using neural
                 language modeling. The proposed framework has been
                 evaluated on three corpora for English, Italian, and
                 Malagasy POS tagging, achieving accuracies of 93.21\%,
                 90.9\%, and 84.5\% individually, which shows a
                 significant improvement compared to the
                 state-of-the-art approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bhattacharya:2019:UCW,
  author =       "Paheli Bhattacharya and Pawan Goyal and Sudeshna
                 Sarkar",
  title =        "Using Communities of Words Derived from Multilingual
                 Word Vectors for Cross-Language Information Retrieval
                 in {Indian} Languages",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3208358",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3208358",
  abstract =     "We investigate the use of word embeddings for query
                 translation to improve precision in cross-language
                 information retrieval (CLIR). Word vectors represent
                 words in a distributional space such that syntactically
                 or semantically similar words are close to each other
                 in this space. Multilingual word embeddings are
                 constructed in such a way that similar words across
                 languages have similar vector representations. We
                 explore the effective use of bilingual and multilingual
                 word embeddings learned from comparable corpora of
                 Indic languages to the task of CLIR. We propose a
                 clustering method based on the multilingual word
                 vectors to group similar words across languages. For
                 this we construct a graph with words from multiple
                 languages as nodes and with edges connecting words with
                 similar vectors. We use the Louvain method for
                 community detection to find communities in this graph.
                 We show that choosing target language words as query
                 translations from the clusters or communities
                 containing the query terms helps in improving CLIR. We
                 also find that better-quality query translations are
                 obtained when words from more languages are used to do
                 the clustering even when the additional languages are
                 neither the source nor the target languages. This is
                 probably because having more similar words across
                 multiple languages helps define well-defined dense
                 subclusters that help us obtain precise query
                 translations. In this article, we demonstrate the use
                 of multilingual word embeddings and word clusters for
                 CLIR involving Indic languages. We also make available
                 a tool for obtaining related words and the
                 visualizations of the multilingual word vectors for
                 English, Hindi, Bengali, Marathi, Gujarati, and
                 Tamil.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2019:OAE,
  author =       "Maoxi Li and Mingwen Wang",
  title =        "Optimizing Automatic Evaluation of Machine Translation
                 with the {ListMLE} Approach",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3226045",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3226045",
  abstract =     "Automatic evaluation of machine translation is
                 critical for the evaluation and development of machine
                 translation systems. In this study, we propose a new
                 model for automatic evaluation of machine translation.
                 The proposed model combines standard n-gram precision
                 features and sentence semantic mapping features with
                 neural features, including neural language model
                 probabilities and the embedding distances between
                 translation outputs and their reference translations.
                 We optimize the model with a representative list-wise
                 learning to rank approach, ListMLE, in terms of human
                 ranking assessments. The experimental results on
                 WMT'2015 Metrics task indicated that the proposed
                 approach yields significantly better correlations with
                 human assessments than several state-of-the-art
                 baseline approaches. In particular, the results
                 confirmed that the proposed list-wise learning to rank
                 approach is useful and powerful for optimizing
                 automatic evaluation metrics in terms of human ranking
                 assessments. Deep analysis also demonstrated that
                 optimizing automatic metrics with the ListMLE approach
                 is a reasonable method and adding the neural features
                 can gain considerable improvements compared with the
                 traditional features.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Su:2019:RSA,
  author =       "Ming-Hsiang Su and Chung-Hsien Wu and Kun-Yi Huang and
                 Wu-Hsuan Lin",
  title =        "Response Selection and Automatic Message-Response
                 Expansion in Retrieval-Based {QA} Systems using
                 Semantic Dependency Pair Model",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3229184",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3229184",
  abstract =     "This article presents an approach to response
                 selection and message-response (MR) database expansion
                 from the unstructured data on the psychological
                 consultation websites for a retrieval-based question
                 answering (QA) system in a constrained domain for
                 emotional support and comforting. First, we manually
                 construct an initial MR database based on the articles
                 collected from the psychological consultation websites.
                 The Chinese Knowledge and Information Processing
                 probabilistic context-free grammar is adopted to obtain
                 the semantic dependency graphs (SDGs) of all the
                 messages and responses in the initial MR database. For
                 each sentence in the MR database, all the semantic
                 dependencies, each composed of two words and their
                 semantic relation, are extracted from the SDG of the
                 sentence to form a semantic dependency set. Finally, a
                 matrix with the element representing the correlation
                 between the semantic dependencies of the messages and
                 their corresponding responses is constructed as a
                 semantic dependency pair model (SDPM) for response
                 selection. Moreover, as the number of MR pairs in the
                 psychological consultation websites is increasing day
                 by day, the MR database in the QA system should be
                 expanded to meet the needs of the users. For MR
                 database expansion, the unstructured data from the
                 message board are automatically collected. For the
                 collected data, the supervised latent Dirichlet
                 allocation is adopted for event detection and then the
                 event-based delta Bayesian Information Criterion is
                 used for message and response article segmentation.
                 Each extracted message segment is then fed to the
                 constructed retrieval-based QA system to find the best
                 matched response segment and the matching score is also
                 estimated to verify if the new MR pair is suitable to
                 be included in the expanded MR database. Fivefold cross
                 validation was employed to evaluate the performance of
                 the proposed retrieval-based QA system over the
                 expanded MR database based on SDPM. Compared to the
                 vector space model-based method, the Okapi BM25 model,
                 and the deep learning-based sequence-to-sequence with
                 attention model, the proposed approach achieved a more
                 favorable performance according to a statistical
                 significance test. The retrieval accuracy based on MR
                 expansion was also evaluated and a satisfactory result
                 was obtained confirming the effectiveness of the
                 expanded MR database. In addition, the user's
                 satisfaction score of the proposed system was evaluated
                 using the Cronbach's alpha value and the satisfaction
                 score of the proposed SDPM was higher than those of the
                 methods for comparison.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Huang:2019:IMH,
  author =       "Guoping Huang and Jiajun Zhang and Yu Zhou and
                 Chengqing Zong",
  title =        "Input Method for Human Translators: a Novel Approach
                 to Integrate Machine Translation Effectively and
                 Imperceptibly",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3230638",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3230638",
  abstract =     "Computer-aided translation (CAT) systems are the most
                 popular tool for helping human translators efficiently
                 perform language translation. To further improve the
                 translation efficiency, there is an increasing interest
                 in applying machine translation (MT) technology to
                 upgrade CAT. To thoroughly integrate MT into CAT
                 systems, in this article, we propose a novel approach:
                 a new input method that makes full use of the knowledge
                 adopted by MT systems, such as translation rules,
                 decoding hypotheses, and n-best translation lists. The
                 proposed input method contains two parts: a phrase
                 generation model, allowing human translators to type
                 target sentences quickly, and an n-gram prediction
                 model, helping users choose perfect MT fragments
                 smoothly. In addition, to tune the underlying MT system
                 to generate the input method preferable results, we
                 design a new evaluation metric for the MT system. The
                 proposed input method integrates MT effectively and
                 imperceptibly, and it is particularly suitable for many
                 target languages with complex characters, such as
                 Chinese and Japanese. The extensive experiments
                 demonstrate that our method saves more than 23\% in
                 time and over 42\% in keystrokes, and it also improves
                 the translation quality by more than 5 absolute BLEU
                 scores compared with the strong baseline, i.e.,
                 post-editing using Google Pinyin.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Altakrori:2019:AAA,
  author =       "Malik H. Altakrori and Farkhund Iqbal and Benjamin C.
                 M. Fung and Steven H. H. Ding and Abdallah Tubaishat",
  title =        "{Arabic} Authorship Attribution: an Extensive Study on
                 {Twitter} Posts",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3236391",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3236391",
  abstract =     "Law enforcement faces problems in tracing the true
                 identity of offenders in cybercrime investigations.
                 Most offenders mask their true identity, impersonate
                 people of high authority, or use identity deception and
                 obfuscation tactics to avoid detection and
                 traceability. To address the problem of anonymity,
                 authorship analysis is used to identify individuals by
                 their writing styles without knowing their actual
                 identities. Most authorship studies are dedicated to
                 English due to its widespread use over the Internet,
                 but recent cyber-attacks such as the distribution of
                 Stuxnet indicate that Internet crimes are not limited
                 to a certain community, language, culture, ideology, or
                 ethnicity. To effectively investigate cybercrime and to
                 address the problem of anonymity in online
                 communication, there is a pressing need to study
                 authorship analysis of languages such as Arabic,
                 Chinese, Turkish, and so on. Arabic, the focus of this
                 study, is the fourth most widely used language on the
                 Internet. This study investigates authorship of Arabic
                 discourse/text, especially tiny text, Twitter posts. We
                 benchmark the performance of a profile-based approach
                 that uses n -grams as features and compare it with
                 state-of-the-art instance-based classification
                 techniques. Then we adapt an event-visualization tool
                 that is developed for English to accommodate both
                 Arabic and English languages and visualize the result
                 of the attribution evidence. In addition, we
                 investigate the relative effect of the training set,
                 the length of tweets, and the number of authors on
                 authorship classification accuracy. Finally, we show
                 that diacritics have an insignificant effect on the
                 attribution process and part-of-speech tags are less
                 effective than character-level and word-level n
                 -grams.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhang:2019:WSB,
  author =       "Shaoning Zhang and Cunli Mao and Zhengtao Yu and
                 Hongbin Wang and Zhongwei Li and Jiafu Zhang",
  title =        "Word Segmentation for {Burmese} Based on Dual-Layer
                 {CRFs}",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3232537",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3232537",
  abstract =     "Burmese is an isolated language, in which the syllable
                 is the smallest unit. Syllable segmentation methods
                 based on matching lead to performance subject to the
                 syllable segmentation effect. This article proposes a
                 word segmentation method with fusion conditions of
                 double syllable features. It combines word segmentation
                 and segmentation of syllables into one process, thus
                 reducing the impact of errors on the syllable
                 segmentation of Burmese. In the first layer of the
                 conditional random fields (CRF) model, Burmese
                 characters as atomic features are integrated into the
                 Burma section of the Barkis Speech Paradigm (Backus
                 normal form) features to realize the Burma syllable
                 sequence tags. In the second layer of the CRFs model,
                 with the syllable marked as input, it realizes the
                 sequence markers through building a feature template
                 with syllables as atomic features. The experimental
                 results show that the proposed method has a better
                 effect compared with the method based on the matching
                 of syllables.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2019:IML,
  author =       "Junjie Li and Haoran Li and Xiaomian Kang and Haitong
                 Yang and Chengqing Zong",
  title =        "Incorporating Multi-Level User Preference into
                 Document-Level Sentiment Classification",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3234512",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3234512",
  abstract =     "Document-level sentiment classification aims to
                 predict a user's sentiment polarity in a document about
                 a product. Most existing methods only focus on review
                 contents and ignore users who post reviews. In fact,
                 when reviewing a product, different users have
                 different word-using habits to express opinions (i.e.,
                 word-level user preference), care about different
                 attributes of the product (i.e., aspect-level user
                 preference), and have different characteristics to
                 score the review (i.e., polarity-level user
                 preference). These preferences have great influence on
                 interpreting the sentiment of text. To address this
                 issue, we propose a model called Hierarchical User
                 Attention Network (HUAN), which incorporates
                 multi-level user preference into a hierarchical neural
                 network to perform document-level sentiment
                 classification. Specifically, HUAN encodes different
                 kinds of information (word, sentence, aspect, and
                 document) in a hierarchical structure and imports user
                 embedding and user attention mechanism to model these
                 preferences. Empirical results on two real-world
                 datasets show that HUAN achieves state-of-the-art
                 performance. Furthermore, HUAN can also mine important
                 attributes of products for different users.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jain:2019:UES,
  author =       "Amita Jain and Minni Jain and Goonjan Jain and
                 Devendra K. Tayal",
  title =        "{``UTTAM''}: an Efficient Spelling Correction System
                 for {Hindi} Language Based on Supervised Learning",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3264620",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3264620",
  abstract =     "In this article, we propose a system called ``UTTAM,''
                 for correcting spelling errors in Hindi language text
                 using supervised learning. Unlike other languages,
                 Hindi contains a large set of characters, words with
                 inflections and complex characters, phonetically
                 similar sets of characters, and so on. The complexity
                 increases the possibility of confusion and occasionally
                 leads to entering a wrong character in a word. The
                 existence of spelling errors in text significantly
                 decreases the accuracy of the available resources, like
                 search engine, text editor, and so on. The proposed
                 work is the first approach to correct non-word (Out of
                 Vocabulary) errors as well as real-word errors
                 simultaneously in a sentence of Hindi language. The
                 proposed method investigates the human behavior, i.e.,
                 the type and frequency of spelling errors done by
                 humans in Hindi text. Based on the type and frequency
                 of spelling errors, the heterogeneous data is collected
                 in matrices. This data in matrices is used to generate
                 the suitable candidate words for an input word. After
                 generating candidate words, the Viterbi algorithm is
                 applied to perform the word correction. The Viterbi
                 algorithm finds the best sequence of candidate words to
                 correct the input sentence. For Hindi, this work is the
                 first attempt for real-word error correction. For
                 non-word errors, the experiments show that ``UTTAM''
                 performs better than the existing systems SpellGuru and
                 Saksham.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Murthy:2019:INT,
  author =       "Rudra Murthy and Mitesh M. Khapra and Pushpak
                 Bhattacharyya",
  title =        "Improving {NER} Tagging Performance in Low-Resource
                 Languages via Multilingual Learning",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "2",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3238797",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3238797",
  abstract =     "Existing supervised solutions for Named Entity
                 Recognition (NER) typically rely on a large annotated
                 corpus. Collecting large amounts of NER annotated
                 corpus is time-consuming and requires considerable
                 human effort. However, collecting small amounts of
                 annotated corpus for any language is feasible, but the
                 performance degrades due to data sparsity. We address
                 the data sparsity by borrowing features from the data
                 of a closely related language. We use hierarchical
                 neural networks to train a supervised NER system. The
                 feature borrowing from a closely related language
                 happens via the shared layers of the network. The
                 neural network is trained on the combined dataset of
                 the low-resource language and a closely related
                 language, also termed Multilingual Learning. Unlike
                 existing systems, we share all layers of the network
                 between the two languages. We apply multilingual
                 learning for NER in Indian languages and empirically
                 show the benefits over a monolingual deep learning
                 system and a traditional machine-learning system with
                 some feature engineering. Using multilingual learning,
                 we show that the low-resource language NER performance
                 increases mainly due to (1) increased named entity
                 vocabulary, (2) cross-lingual subword features, and (3)
                 multilingual learning playing the role of
                 regularization.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jarrar:2019:DBM,
  author =       "Mustafa Jarrar and Fadi Zaraket and Rami Asia and
                 Hamzeh Amayreh",
  title =        "Diacritic-Based Matching of {Arabic} Words",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "2",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3242177",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3242177",
  abstract =     "Words in Arabic consist of letters and short vowel
                 symbols called diacritics inscribed atop regular
                 letters. Changing diacritics may change the syntax and
                 semantics of a word; turning it into another. This
                 results in difficulties when comparing words based
                 solely on string matching. Typically, Arabic NLP
                 applications resort to morphological analysis to battle
                 ambiguity originating from this and other challenges.
                 In this article, we introduce three alternative
                 algorithms to compare two words with possibly different
                 diacritics. We propose the Subsume knowledge-based
                 algorithm, the Imply rule-based algorithm, and the
                 Alike machine-learning-based algorithm. We evaluated
                 the soundness, completeness, and accuracy of the
                 algorithms against a large dataset of 86,886 word
                 pairs. Our evaluation shows that the accuracy of
                 Subsume (100\%), Imply (99.32\%), and Alike (99.53\%).
                 Although accurate, Subsume was able to judge only 75\%
                 of the data. Both Subsume and Imply are sound, while
                 Alike is not. We demonstrate the utility of the
                 algorithms using a real-life use case --- in lemma
                 disambiguation and in linking hundreds of Arabic
                 dictionaries.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bhattacharya:2019:SSW,
  author =       "Nilanjana Bhattacharya and Partha Pratim Roy and
                 Umapada Pal",
  title =        "Sub-Stroke-Wise Relative Feature for Online {Indic}
                 Handwriting Recognition",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "2",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3264735",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3264735",
  abstract =     "The main problem of Bangla (Bengali) and Devanagari
                 handwriting recognition is the shape similarity of
                 characters. There are only a few pieces of work on
                 writer-independent cursive online Indian text
                 recognition, and the shape similarity problem needs
                 more attention from the researchers. To handle the
                 shape similarity problem of cursive characters of
                 Bangla and Devanagari scripts, in this article, we
                 propose a new category of features called `
                 sub-stroke-wise relative feature ' (SRF) which are
                 based on relative information of the constituent parts
                 of the handwritten strokes. Relative information among
                 some of the parts within a character can be a
                 distinctive feature as it scales up small
                 dissimilarities and enhances discrimination among
                 similar-looking shapes. Also, contextual anticipatory
                 phenomena are automatically modeled by this type of
                 feature, as it takes into account the influence of
                 previous and forthcoming strokes. We have tested
                 popular state-of-the-art feature sets as well as
                 proposed SRF using various (up to 20,000-word) lexicons
                 and noticed that SRF significantly outperforms the
                 state-of-the-art feature sets for online Bangla and
                 Devanagari cursive word recognition.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mrinalini:2019:PBP,
  author =       "K. Mrinalini and T. Nagarajan and P. Vijayalakshmi",
  title =        "Pause-Based Phrase Extraction and Effective {OOV}
                 Handling for Low-Resource Machine Translation Systems",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "2",
  pages =        "12:1--12:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3265751",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3265751",
  abstract =     "Machine translation is the core problem for several
                 natural language processing research across the globe.
                 However, building a translation system involving
                 low-resource languages remains a challenge with respect
                 to statistical machine translation (SMT). This work
                 proposes and studies the effect of a phrase-induced
                 hybrid machine translation system for translation from
                 English to Tamil, under a low-resource setting. Unlike
                 conventional hybrid MT systems, the free-word ordering
                 feature of the target language Tamil is exploited to
                 form a re-ordered target language model and to extend
                 the parallel text corpus for training the SMT. In the
                 current work, a novel rule-based phrase-extraction
                 method, implemented using parts-of-speech (POS) and
                 place-of-pause in both languages is proposed, which is
                 used to pre-process the training corpus for developing
                 the back-off phrase-induced SMT. Further,
                 out-of-vocabulary (OOV) words are handled using
                 speech-based transliteration and two-level thesaurus
                 intersection techniques based on the POS tag of the OOV
                 word. To ensure that the input with OOV words does not
                 skip phrase-level translation in the hierarchical
                 model, a phrase-level example-based machine translation
                 approach is adopted to find the closest matching phrase
                 and perform translation followed by OOV replacement.
                 The proposed system results in a bilingual evaluation
                 understudy score of 84.78 and a translation edit rate
                 of 19.12. The performance of the system is compared in
                 terms of adequacy and fluency, with existing
                 translation systems for this specific language pair,
                 and it is observed that the proposed system outperforms
                 its counterparts.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Le:2019:LRM,
  author =       "Ngoc Tan Le and Fatiha Sadat and Lucie Menard and Dien
                 Dinh",
  title =        "Low-Resource Machine Transliteration Using Recurrent
                 Neural Networks",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "2",
  pages =        "13:1--13:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3265752",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3265752",
  abstract =     "Grapheme-to-phoneme models are key components in
                 automatic speech recognition and text-to-speech
                 systems. With low-resource language pairs that do not
                 have available and well-developed pronunciation
                 lexicons, grapheme-to-phoneme models are particularly
                 useful. These models are based on initial alignments
                 between grapheme source and phoneme target sequences.
                 Inspired by sequence-to-sequence recurrent neural
                 network--based translation methods, the current
                 research presents an approach that applies an alignment
                 representation for input sequences and pretrained
                 source and target embeddings to overcome the
                 transliteration problem for a low-resource languages
                 pair. Evaluation and experiments involving French and
                 Vietnamese showed that with only a small bilingual
                 pronunciation dictionary available for training the
                 transliteration models, promising results were obtained
                 with a large increase in BLEU scores and a reduction in
                 Translation Error Rate (TER) and Phoneme Error Rate
                 (PER). Moreover, we compared our proposed neural
                 network--based transliteration approach with a
                 statistical one.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Na:2019:TBK,
  author =       "Seung-hoon Na and Jianri Li and Jong-hoon Shin and
                 Kangil Kim",
  title =        "Transition-Based {Korean} Dependency Parsing Using
                 Hybrid Word Representations of Syllables and Morphemes
                 with {LSTMs}",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "2",
  pages =        "14:1--14:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3241745",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3241745",
  abstract =     "Recently, neural approaches for transition-based
                 dependency parsing have become one of the state-of-the
                 art methods for performing dependency parsing tasks in
                 many languages. In neural transition-based parsing, a
                 parser state representation is first computed from the
                 configuration of a stack and a buffer, which is then
                 fed into a feed-forward neural network model that
                 predicts the next transition action. Given that words
                 are basic elements of a stack and buffer, a parser
                 state representation is considerably affected by how a
                 word representation is defined. In particular, word
                 representation issues become more critical in
                 morphologically rich languages such as Korean, as the
                 set of potential words is not bound but introduce the
                 second-order vocabulary complexity, called the phrase
                 vocabulary complexity due to the agglutinative
                 characteristics of the language. In this article, we
                 propose a hybrid word representation that combines two
                 compositional word representations, each of which is
                 derived from representations of syllables and
                 morphemes, respectively. Our underlying assumption for
                 this hybrid word representation is that, because both
                 syllables and morphemes are two common ways of
                 decomposing Korean words, it is expected that their
                 effects in inducing word representation are
                 complementary to one another. Experimental results
                 carried on Sejong and SPMRL 2014 datasets show that our
                 proposed hybrid word representation leads to the
                 state-of-the-art performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Akhtar:2019:IWE,
  author =       "Md Shad Akhtar and Palaash Sawant and Sukanta Sen and
                 Asif Ekbal and Pushpak Bhattacharyya",
  title =        "Improving Word Embedding Coverage in Less-Resourced
                 Languages Through Multi-Linguality and
                 Cross-Linguality: a Case Study with Aspect-Based
                 Sentiment Analysis",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "2",
  pages =        "15:1--15:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3273931",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3273931",
  abstract =     "In the era of deep learning-based systems, efficient
                 input representation is one of the primary requisites
                 in solving various problems related to Natural Language
                 Processing (NLP), data mining, text mining, and the
                 like. Absence of adequate representation for an input
                 introduces the problem of data sparsity, and it poses a
                 great challenge to solve the underlying problem. The
                 problem is more intensified with resource-poor
                 languages due to the absence of a sufficiently large
                 corpus required to train a word embedding model. In
                 this work, we propose an effective method to improve
                 the word embedding coverage in less-resourced languages
                 by leveraging bilingual word embeddings learned from
                 different corpora. We train and evaluate deep Long
                 Short Term Memory (LSTM)-based architecture and show
                 the effectiveness of the proposed approach for two
                 aspect-level sentiment analysis tasks (i.e., aspect
                 term extraction and sentiment classification). The
                 neural network architecture is further assisted by
                 hand-crafted features for prediction. We apply the
                 proposed model in two experimental setups:
                 multi-lingual and cross-lingual. Experimental results
                 show the effectiveness of the proposed approach against
                 the state-of-the-art methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Nakamura:2019:WBR,
  author =       "Tatsuya Nakamura and Masumi Shirakawa and Takahiro
                 Hara and Shojiro Nishio",
  title =        "{Wikipedia}-Based Relatedness Measurements for
                 Multilingual Short Text Clustering",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "2",
  pages =        "16:1--16:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3276473",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3276473",
  abstract =     "Throughout the world, people can post information
                 about their local area in their own languages using
                 social networking services. Multilingual short text
                 clustering is an important task to organize such
                 information, and it can be applied to various
                 applications, such as event detection and
                 summarization. However, measuring the relatedness
                 between short texts written in various languages is a
                 challenging problem. In addition to handling multiple
                 languages, the semantic gaps among all languages must
                 be considered. In this article, we propose two
                 Wikipedia-based semantic relatedness measurement
                 methods for multilingual short text clustering. The
                 proposed methods solve the semantic gap problem by
                 incorporating the inter-language links of Wikipedia
                 into Extended Naive Bayes (ENB), a probabilistic method
                 that can be applied to measure semantic relatedness
                 among monolingual short texts. The proposed methods
                 represent a multilingual short text as a vector of the
                 English version of Wikipedia articles (entities). By
                 transferring texts to a unified vector space, the
                 relatedness between texts in different languages with
                 similar meanings can be increased. We also propose an
                 approach that can improve clustering performance and
                 reduce the processing time by eliminating
                 language-specific entities in the unified vector space.
                 Experimental results on multilingual Twitter message
                 clustering revealed that the proposed methods
                 outperformed cross-lingual explicit semantic analysis,
                 a previously proposed method to measure relatedness
                 between texts in different languages. Moreover, the
                 proposed methods were comparable to ENB applied to
                 texts translated into English using a proprietary
                 translation service. The proposed methods enabled
                 relatedness measurements for multilingual short text
                 clustering without requiring machine translation
                 processes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ding:2019:NFF,
  author =       "Chenchen Ding and Masao Utiyama and Eiichiro Sumita",
  title =        "{NOVA}: a Feasible and Flexible Annotation System for
                 Joint Tokenization and Part-of-Speech Tagging",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "2",
  pages =        "17:1--17:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3276773",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3276773",
  abstract =     "A feasible and flexible annotation system is designed
                 for joint tokenization and part-of-speech (POS) tagging
                 to annotate those languages without natural definitions
                 of words. This design was motivated by the fact that
                 word separators are not used in many highly analytic
                 East and Southeast Asian languages. Although several of
                 the languages are well-studied, e.g., Chinese and
                 Japanese, many are understudied with low resources,
                 e.g., Burmese (Myanmar) and Khmer. In the first part of
                 the article, the proposed annotation system, named
                 nova, is introduced. nova contains only four basic tags
                 (n, v, a, and o); these tags can be further modified
                 and combined to adapt complex linguistic phenomena in
                 tokenization and POS tagging. In the second part of the
                 article, the feasibility and flexibility of nova is
                 illustrated from the annotation practice on Burmese and
                 Khmer. The relation between nova and two universal POS
                 tagsets is discussed in the final part of the
                 article.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ahmadi:2019:RBK,
  author =       "Sina Ahmadi",
  title =        "A Rule-Based {Kurdish} Text Transliteration System",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "2",
  pages =        "18:1--18:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3278623",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3278623",
  abstract =     "In this article, we present a rule-based approach for
                 transliterating two of the most used orthographies in
                 Sorani Kurdish. Our work consists of detecting a
                 character in a word by removing the possible
                 ambiguities and mapping it into the target orthography.
                 We describe different challenges in Kurdish text mining
                 and propose novel ideas concerning the transliteration
                 task for Sorani Kurdish. Our transliteration system,
                 named Wergor, achieves 82.79\% overall precision and
                 more than 99\% in detecting the double-usage
                 characters. We also present a manually transliterated
                 corpus for Kurdish.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kamila:2019:THL,
  author =       "Sabyasachi Kamila and Mohammad Hasanuzzaman and Asif
                 Ekbal and Pushpak Bhattacharyya",
  title =        "{Tempo-HindiWordNet}: a Lexical Knowledge-base for
                 Temporal Information Processing",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "2",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3277504",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3277504",
  abstract =     "Temporality has significantly contributed to various
                 Natural Language Processing and Information Retrieval
                 applications. In this article, we first create a
                 lexical knowledge-base in Hindi by identifying the
                 temporal orientation of word senses based on their
                 definition and then use this resource to detect
                 underlying temporal orientation of the sentences. To
                 create the resource, we propose a semi-supervised
                 learning framework, where each synset of the Hindi
                 WordNet is classified into one of the five categories,
                 namely, past, present, future, neutral, and atemporal.
                 The algorithm initiates learning with a set of seed
                 synsets and then iterates following different expansion
                 strategies, viz. probabilistic expansion based on
                 classifier's confidence and semantic distance based
                 measures. We manifest the usefulness of the resource
                 that we build on an external task, viz. sentence-level
                 temporal classification. The underlying idea is that a
                 temporal knowledge-base can help in classifying the
                 sentences according to their inherent temporal
                 properties. Experiments on two different domains, viz.
                 general and Twitter, show interesting results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Alnawas:2019:SAI,
  author =       "Anwar Alnawas and Nursal Arici",
  title =        "Sentiment Analysis of {Iraqi Arabic} Dialect on
                 {Facebook} Based on Distributed Representations of
                 Documents",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "20:1--20:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3278605",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3278605",
  abstract =     "Nowadays, social media is used by many people to
                 express their opinions about a variety of topics.
                 Opinion Mining or Sentiment Analysis techniques extract
                 opinions from user generated contents. Over the years,
                 a multitude of Sentiment Analysis studies has been done
                 about the English language with deficiencies of
                 research in all other languages. Unfortunately, Arabic
                 is one of the languages that seems to lack substantial
                 research, despite the rapid growth of its use on social
                 media outlets. Furthermore, specific Arabic dialects
                 should be studied, not just Modern Standard Arabic. In
                 this paper, we experiment sentiments analysis of Iraqi
                 Arabic dialect using word embedding. First, we made a
                 large corpus from previous works to learn word
                 representations. Second, we generated word embedding
                 model by training corpus using Doc2Vec representations
                 based on Paragraph and Distributed Memory Model of
                 Paragraph Vectors (DM-PV) architecture. Lastly, the
                 represented feature used for training four binary
                 classifiers (Logistic Regression, Decision Tree,
                 Support Vector Machine and Naive Bayes) to detect
                 sentiment. We also experimented different values of
                 parameters (window size, dimension and negative
                 samples). In the light of the experiments, it can be
                 concluded that our approach achieves a better
                 performance for Logistic Regression and Support Vector
                 Machine than the other classifiers.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Singh:2019:OHG,
  author =       "Sukhdeep Singh and Anuj Sharma",
  title =        "Online Handwritten {Gurmukhi} Words Recognition: an
                 Inclusive Study",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "21:1--21:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3282441",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3282441",
  abstract =     "Identification of offline and online handwritten words
                 is a challenging and complex task. In comparison to
                 Latin and Oriental scripts, the research and study of
                 handwriting recognition at word level in Indic scripts
                 is at its initial phases. The two main methods of
                 handwriting recognition are global and analytical. The
                 present work introduces a novel analytical approach for
                 online handwritten Gurmukhi word recognition based on a
                 minimal set of words and recognizes an input Gurmukhi
                 word as a sequence of characters. We employed a
                 sequential step-by-step approach to recognize online
                 handwritten Gurmukhi words. Considering the massive
                 variability in online Gurmukhi handwriting, the present
                 work employs the completely linked non-homogeneous
                 hidden Markov model. In the present study, we
                 considered the dependent, major-dependent, and
                 super-dependent nature of strokes to form Gurmukhi
                 characters in words. On test sets of online handwritten
                 Gurmukhi datasets, the word-level accuracy rates are
                 85.98\%, 84.80\%, 82.40\%, and 82.20\% in four
                 different modes. Besides the online Gurmukhi word
                 recognition, the present work also provides Gurmukhi
                 handwriting analysis study for varying writing styles
                 and proposes novel techniques for zone detection and
                 rearrangement of strokes. Our proposed algorithms have
                 been successfully employed to online handwritten
                 Gurmukhi word recognition in dependent and independent
                 modes of handwriting.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yucesoy:2019:COW,
  author =       "Veysel Y{\"u}cesoy and Aykut Ko{\c{c}}",
  title =        "Co-occurrence Weight Selection in Generation of Word
                 Embeddings for Low Resource Languages",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "22:1--22:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3282443",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3282443",
  abstract =     "This study aims to increase the performance of word
                 embeddings by proposing a new weighting scheme for
                 co-occurrence counting. The idea behind this new family
                 of weights is to overcome the disadvantage of distant
                 appearing word pairs, which are indeed semantically
                 close, while representing them in the co-occurrence
                 counting. For high-resource languages, this
                 disadvantage might not be effective due to the high
                 frequency of co-occurrence. However, when there are not
                 enough available resources, such pairs suffer from
                 being distant. To favour such pairs, a weighting scheme
                 based on a polynomial fitting procedure is proposed to
                 shift the weights up for distant words while the
                 weights of nearby words are left almost unchanged. The
                 parameter optimization for new weights and the effects
                 of the weighting scheme are analysed for the English,
                 Italian, and Turkish languages. A small portion of
                 English resources and a quarter of Italian resources
                 are utilized for demonstration purposes, as if these
                 languages are low-resource languages. Performance
                 increase is observed in analogy tests when the proposed
                 weighting scheme is applied to relatively small corpora
                 (i.e., mimicking low-resource languages) of both
                 English and Italian. To show the effectiveness of the
                 proposed scheme in small corpora, it is also shown for
                 a large English corpus that the performance of the
                 proposed weighting scheme cannot outperform the
                 original weights. Since Turkish is relatively a
                 low-resource language, it is demonstrated that the
                 proposed weighting scheme can increase the performance
                 of both analogy and similarity tests when all Turkish
                 Wikipedia pages are utilized as a corpus. The positive
                 effect of the proposed scheme has also been
                 demonstrated in a standard sentiment analysis task for
                 the Turkish language.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bounhas:2019:UCA,
  author =       "Ibrahim Bounhas",
  title =        "On the Usage of a Classical {Arabic} Corpus as a
                 Language Resource: Related Research and Key
                 Challenges",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "23:1--23:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3277591",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3277591",
  abstract =     "This article presents a literature review of
                 computer-science-related research applied on hadith, a
                 kind of Arabic narration which appeared in the 7th
                 century. We study and compare existent works in several
                 fields of Natural Language Processing (NLP),
                 Information Retrieval (IR), and Knowledge Extraction
                 (KE). Thus, we illicit their main drawbacks and
                 identify some perspectives, which may be considered by
                 the research community. We also study the
                 characteristics of these types of documents, by
                 enumerating the advantages/limits of using hadith as a
                 language resource. Moreover, our study shows that
                 previous studies used different collections of hadiths,
                 thus making it hard to compare their results
                 objectively. Besides, many preprocessing steps are
                 recurrent through these applications, thus wasting a
                 lot of time. Consequently, the key issues for building
                 generic language resources from hadiths are discussed,
                 taking into account the relevance of related literature
                 and the wide community of researchers that are
                 interested in these narrations. The ultimate goal is to
                 structure hadith books for multiple usages, thus
                 building common collections which may be exploited in
                 future applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jung:2019:MPN,
  author =       "Sangkeun Jung and Cheon-Eum Park and Changki Lee",
  title =        "Multitask Pointer Network for {Korean} Dependency
                 Parsing",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "24:1--24:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3282442",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3282442",
  abstract =     "Dependency parsing is a fundamental problem in natural
                 language processing. We introduce a novel
                 dependency-parsing framework called
                 head-pointing--based dependency parsing. In this
                 framework, we cast the Korean dependency parsing
                 problem as a statistical head-pointing and arc-labeling
                 problem. To address this problem, a novel neural
                 network called the multitask pointer network is devised
                 for a neural sequential head-pointing and type-labeling
                 architecture. Our approach does not require any
                 handcrafted features or language-specific rules to
                 parse dependency. Furthermore, it achieves
                 state-of-the-art performance for Korean dependency
                 parsing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bolucu:2019:UJP,
  author =       "Necva B{\"o}l{\"u}c{\"u} and Burcu Can",
  title =        "Unsupervised Joint {PoS} Tagging and Stemming for
                 Agglutinative Languages",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "25:1--25:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3292398",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3292398",
  abstract =     "The number of possible word forms is theoretically
                 infinite in agglutinative languages. This brings up the
                 out-of-vocabulary (OOV) issue for part-of-speech (PoS)
                 tagging in agglutinative languages. Since inflectional
                 morphology does not change the PoS tag of a word, we
                 propose to learn stems along with PoS tags
                 simultaneously. Therefore, we aim to overcome the
                 sparsity problem by reducing word forms into their
                 stems. We adopt a Bayesian model that is fully
                 unsupervised. We build a Hidden Markov Model for PoS
                 tagging where the stems are emitted through hidden
                 states. Several versions of the model are introduced in
                 order to observe the effects of different dependencies
                 throughout the corpus, such as the dependency between
                 stems and PoS tags or between PoS tags and affixes.
                 Additionally, we use neural word embeddings to estimate
                 the semantic similarity between the word form and stem.
                 We use the semantic similarity as prior information to
                 discover the actual stem of a word since inflection
                 does not change the meaning of a word. We compare our
                 models with other unsupervised stemming and PoS tagging
                 models on Turkish, Hungarian, Finnish, Basque, and
                 English. The results show that a joint model for PoS
                 tagging and stemming improves on an independent PoS
                 tagger and stemmer in agglutinative languages.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kang:2019:SDR,
  author =       "Xiaomian Kang and Chengqing Zong and Nianwen Xue",
  title =        "A Survey of Discourse Representations for {Chinese}
                 Discourse Annotation",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "26:1--26:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3293442",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3293442",
  abstract =     "A key element in computational discourse analysis is
                 the design of a formal representation for the discourse
                 structure of a text. With machine learning being the
                 dominant method, it is important to identify a
                 discourse representation that can be used to perform
                 large-scale annotation. This survey provides a
                 systematic analysis of existing discourse
                 representation theories to evaluate whether they are
                 suitable for annotation of Chinese text. Specifically,
                 the two properties, expressiveness and practicality,
                 are introduced to compare the representations of
                 theories based on rhetorical relations and the
                 representations of theories based on entity relations.
                 The comparison systematically reveals linguistic and
                 computational characteristics of the theories. After
                 that, we conclude that none of the existing theories
                 are quite suitable for scalable Chinese discourse
                 annotation because they are not both expressive and
                 practical. Therefore, a new discourse representation
                 needs to be proposed, which should balance the
                 expressiveness and practicality, and cover rhetorical
                 relations and entity relations. Inspired by the
                 conclusions, this survey discusses some preliminary
                 proposals on how to represent the discourse structure
                 that are worth pursuing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Badaro:2019:SOM,
  author =       "Gilbert Badaro and Ramy Baly and Hazem Hajj and Wassim
                 El-Hajj and Khaled Bashir Shaban and Nizar Habash and
                 Ahmad Al-Sallab and Ali Hamdi",
  title =        "A Survey of Opinion Mining in {Arabic}: a
                 Comprehensive System Perspective Covering Challenges
                 and Advances in Tools, Resources, Models, Applications,
                 and Visualizations",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "27:1--27:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3295662",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3295662",
  abstract =     "Opinion-mining or sentiment analysis continues to gain
                 interest in industry and academics. While there has
                 been significant progress in developing models for
                 sentiment analysis, the field remains an active area of
                 research for many languages across the world, and in
                 particular for the Arabic language, which is the fifth
                 most-spoken language and has become the fourth
                 most-used language on the Internet. With the flurry of
                 research activity in Arabic opinion mining, several
                 researchers have provided surveys to capture advances
                 in the field. While these surveys capture a wealth of
                 important progress in the field, the fast pace of
                 advances in machine learning and natural language
                 processing (NLP) necessitates a continuous need for a
                 more up-to-date literature survey. The aim of this
                 article is to provide a comprehensive literature survey
                 for state-of-the-art advances in Arabic opinion mining.
                 The survey goes beyond surveying previous works that
                 were primarily focused on classification models.
                 Instead, this article provides a comprehensive system
                 perspective by covering advances in different aspects
                 of an opinion-mining system, including advances in NLP
                 software tools, lexical sentiment and corpora
                 resources, classification models, and applications of
                 opinion mining. It also presents future directions for
                 opinion mining in Arabic. The survey also covers latest
                 advances in the field, including deep learning advances
                 in Arabic Opinion Mining. The article provides
                 state-of-the-art information to help new or established
                 researchers in the field as well as industry developers
                 who aim to deploy an operational complete
                 opinion-mining system. Key insights are captured at the
                 end of each section for particular aspects of the
                 opinion-mining system giving the reader a choice of
                 focusing on particular aspects of interest.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Masmoudi:2019:ADR,
  author =       "Abir Masmoudi and Salima Mdhaffar and Rahma Sellami
                 and Lamia Hadrich Belguith",
  title =        "Automatic Diacritics Restoration for {Tunisian}
                 Dialect",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "28:1--28:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3297278",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3297278",
  abstract =     "Modern Standard Arabic, as well as Arabic dialect
                 languages, are usually written without diacritics. The
                 absence of these marks constitute a real problem in the
                 automatic processing of these data by NLP tools.
                 Indeed, writing Arabic without diacritics introduces
                 several types of ambiguity. First, a word without
                 diacratics could have many possible meanings depending
                 on their diacritization. Second, undiacritized surface
                 forms of an Arabic word might have as many as 200
                 readings depending on the complexity of its morphology
                 [12]. In fact, the agglutination property of Arabic
                 might produce a problem that can only be resolved using
                 diacritics. Third, without diacritics a word could have
                 many possible parts of speech (POS) instead of one.
                 This is the case with the words that have the same
                 spelling and POS tag but a different lexical sense, or
                 words that have the same spelling but different POS
                 tags and lexical senses [8]. Finally, there is
                 ambiguity at the grammatical level (syntactic
                 ambiguity). In this article, we propose the first work
                 that investigates the automatic diacritization of
                 Tunisian Dialect texts. We first describe our
                 annotation guidelines and procedure. Then, we propose
                 two major models, namely a statistical machine
                 translation (SMT) and a discriminative model as a
                 sequence classification task based on Conditional
                 Random Fields (CRF). In the second approach, we
                 integrate POS features to influence the generation of
                 diacritics. Diacritics restoration was performed at
                 both the word and the character levels. The results
                 showed high scores of automatic diacritization based on
                 the CRF system (Word Error Rate (WER) 21.44\% for CRF
                 and WER 34.6\% for SMT).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Rudra:2019:IAD,
  author =       "Koustav Rudra and Ashish Sharma and Kalika Bali and
                 Monojit Choudhury and Niloy Ganguly",
  title =        "Identifying and Analyzing Different Aspects of
                 {English--Hindi} Code-Switching in {Twitter}",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "29:1--29:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314935",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314935",
  abstract =     "Code-switching or the juxtaposition of linguistic
                 units from two or more languages in a single utterance,
                 has, in recent times, become very common in text,
                 thanks to social media and other computer mediated
                 forms of communication. In this exploratory study of
                 English-Hindi code-switching on Twitter, we
                 automatically create a large corpus of code-switched
                 tweets and devise techniques to identify the
                 relationship between successive components in a
                 code-switched tweet. More specifically, we identify
                 pragmatic functions such as narrative-evaluative,
                 negative reinforcement, translation or semantically
                 equivalent statements, and so on characterizing the
                 relation between successive components. We analyze the
                 difference/similarity between switching patterns in
                 code-switched and monolingual multi-component tweets.
                 We observe strong dominance of narrative-evaluative
                 (non-opinion to opinion or vice versa) switching in
                 case of both code-switched and monolingual
                 multi-component tweets in around 40\% of cases.
                 Polarity switching appears to be a prevalent switching
                 phenomenon (10\%) specifically in code-switched tweets
                 (three to four times higher than monolingual
                 multi-component tweets) where preference of expressing
                 negative sentiment in Hindi is approximately twice
                 compared to English. Positive reinforcement appears to
                 be an important pragmatic function for English
                 multi-component tweets, whereas negative reinforcement
                 plays a key role for Devanagari multi-component tweets.
                 Our results also indicate that the extent and nature of
                 code-switching also strongly depend on the topic
                 (sports, politics, etc.) of discussion.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Verma:2019:CAH,
  author =       "Pradeepika Verma and Sukomal Pal and Hari Om",
  title =        "A Comparative Analysis on {Hindi} and {English}
                 Extractive Text Summarization",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "30:1--30:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3308754",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3308754",
  abstract =     "Text summarization is the process of transfiguring a
                 large documental information into a clear and concise
                 form. In this article, we present a detailed
                 comparative study of various extractive methods for
                 automatic text summarization on Hindi and English text
                 datasets of news articles. We consider 13 different
                 summarization techniques, namely, TextRank, LexRank,
                 Luhn, LSA, Edmundson, ChunkRank, TGraph, UniRank,
                 NN-ED, NN-SE, FE-SE, SummaRuNNer, and MMR-SE, and we
                 evaluate their performance using various performance
                 metrics, such as precision, recall, F$_1$, cohesion,
                 non-redundancy, readability, and significance. A
                 thorough analysis is done in eight different parts that
                 exhibits the strengths and limitations of these
                 methods, effect of performance over the summary length,
                 impact of language of a document, and other factors as
                 well. A standard summary evaluation tool (ROUGE) and
                 extensive programmatic evaluation using Python 3.5 in
                 Anaconda environment are used to evaluate their
                 outcome.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wei:2019:ROD,
  author =       "Bingzhen Wei and Xuancheng Ren and Yi Zhang and
                 Xiaoyan Cai and Qi Su and Xu Sun",
  title =        "Regularizing Output Distribution of Abstractive
                 {Chinese} Social Media Text Summarization for Improved
                 Semantic Consistency",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "31:1--31:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314934",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314934",
  abstract =     "Abstractive text summarization is a highly difficult
                 problem, and the sequence-to-sequence model has shown
                 success in improving the performance on the task.
                 However, the generated summaries are often inconsistent
                 with the source content in semantics. In such cases,
                 when generating summaries, the model selects
                 semantically unrelated words with respect to the source
                 content as the most probable output. The problem can be
                 attributed to heuristically constructed training data,
                 where summaries can be unrelated to the source content,
                 thus containing semantically unrelated words and
                 spurious word correspondence. In this article, we
                 propose a regularization approach for the
                 sequence-to-sequence model and make use of what the
                 model has learned to regularize the learning objective
                 to alleviate the effect of the problem. In addition, we
                 propose a practical human evaluation method to address
                 the problem that the existing automatic evaluation
                 method does not evaluate the semantic consistency with
                 the source content properly. Experimental results
                 demonstrate the effectiveness of the proposed approach,
                 which outperforms almost all the existing models.
                 Especially, the proposed approach improves the semantic
                 consistency by 4\% in terms of human evaluation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Trieu:2019:LAR,
  author =       "Hai-Long Trieu and Duc-Vu Tran and Ashwin Ittoo and
                 Le-Minh Nguyen",
  title =        "Leveraging Additional Resources for Improving
                 Statistical Machine Translation on {Asian} Low-Resource
                 Languages",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "32:1--32:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314936",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314936",
  abstract =     "Phrase-based machine translation (MT) systems require
                 large bilingual corpora for training. Nevertheless,
                 such large bilingual corpora are unavailable for most
                 language pairs in the world, causing a bottleneck for
                 the development of MT. For the Asian language
                 pairs-Japanese, Indonesian, Malay paired with
                 Vietnamese-they are also not excluded from the case, in
                 which there are no large bilingual corpora on these
                 low-resource language pairs. Furthermore, although the
                 languages are widely used in the world, there is no
                 prior work on MT, which causes an issue for the
                 development of MT on these languages. In this article,
                 we conducted an empirical study of leveraging
                 additional resources to improve MT for the Asian
                 low-resource language pairs: translation from Japanese,
                 Indonesian, and Malay to Vietnamese. We propose an
                 innovative approach that lies in two strategies of
                 building bilingual corpora from comparable data and
                 phrase pivot translation on existing bilingual corpora
                 of the languages paired with English. Bilingual corpora
                 were built from Wikipedia bilingual titles to enhance
                 bilingual data for the low-resource languages.
                 Additionally, we introduced a combined model of the
                 additional resources to create an effective solution to
                 improve MT on the Asian low-resource languages.
                 Experimental results show the effectiveness of our
                 systems with the improvement of +2 to +7 BLEU points.
                 This work contributes to the development of MT on
                 low-resource languages, especially opening a promising
                 direction for the progress of MT on the Asian language
                 pairs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Dehghan:2019:CDS,
  author =       "Mohammad Hossein Dehghan and Heshaam Faili",
  title =        "Converting Dependency Structure Into {Persian} Phrase
                 Structure",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "3",
  pages =        "33:1--33:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314937",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:32 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314937",
  abstract =     "Treebank is one of the important and useful resources
                 in natural language processing represented in two
                 different annotated schemas: phrase and dependency
                 structures. There are many works that convert a phrase
                 structure into a dependency structure and vice versa.
                 Most of them are based that exploit the handcrafted
                 head percolation table and argument table in predefined
                 deterministic ways. In this article, we propose a
                 method to convert a dependency structure into a phrase
                 structure by enriching a trainable model of former
                 hybrid strategy approach. By adding a classifier to the
                 algorithm and using postprocessing modification, the
                 quality of conversion is increased. We evaluate our
                 method in two different languages, English and Persian,
                 and then analyze the errors. The results of our
                 experiments show a 46.01\% reduction of error rate in
                 English and 76.50\% for Persian compared to our
                 baseline. We build a new phrase structure treebank by
                 converting 10,000 sentences of Persian dependency
                 treebank into corresponding phrase structures and
                 correcting them manually.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Awais:2019:RDI,
  author =       "Muhammad Awais and Muhammad Shoaib",
  title =        "Role of Discourse Information in {Urdu} Sentiment
                 Classification: a Rule-based Method and
                 Machine-learning Technique",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "34:1--34:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3300050",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3300050",
  abstract =     "In computational linguistics, sentiment analysis
                 refers to the classification of opinions in a positive
                 class or a negative class. There exist a lot of
                 different methods for sentiment analysis of the English
                 language, but the literature lacks the availability of
                 methods and techniques for Urdu, which is the largely
                 spoken language in the South Asian sub-continent and
                 the national language of Pakistan. The currently
                 available techniques, such as adjective count method
                 known as Bag of Words (BoW), is not sufficient for
                 classification of complex sentiment written in the Urdu
                 language. Also, the performance of available
                 machine-learning techniques (with legacy features), for
                 classification of Urdu sentiments, are not comparable
                 with the achieved accuracy of other languages. In the
                 case of the English language, the discourse information
                 (sub-sentence-level information) boosts the performance
                 of both the BoW method and machine-learning techniques,
                 but there are very few works available that have tested
                 the context-level information for the sentiment
                 analysis of the Urdu language. This research aims to
                 extract the discourse information from the Urdu
                 sentiments and utilise the discourse information to
                 improve the performance and reduce the error rate of
                 existing techniques for Urdu Sentiment classification.
                 The proposed solution extracts the discourse
                 information, suggests a new set of features for
                 machine-learning techniques, and introduces a set of
                 rules to extend the capabilities of the BoW model. The
                 results show that the task has been enhanced
                 significantly and the performance metrics such as
                 recall, precision, and accuracy are increased by
                 31.25\%, 8.46\%, and 21.6\%, respectively. In future,
                 the proposed technique can be extended to sentiments
                 with more than two sub-opinions, such as for blogs,
                 reviews, and TV talk shows.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Nongmeikapam:2019:HMM,
  author =       "Kishorjit Nongmeikapam and Kanan Wahengbam and Oinam
                 Nickson Meetei and Themrichon Tuithung",
  title =        "Handwritten {Manipuri Meetei--Mayek} Classification
                 Using Convolutional Neural Network",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "35:1--35:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3309497",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3309497",
  abstract =     "A new technique for classifying all 56 different
                 characters of the Manipuri Meetei-Mayek (MMM) is
                 proposed herein. The characters are grouped under five
                 categories, which are Eeyek Eepee (original alphabets),
                 Lom Eeyek (additional letters), Cheising Eeyek
                 (digits), Lonsum Eeyek (letters with short endings),
                 and Cheitap Eeyek (vowel signs). Two related works
                 proposed by previous researchers are studied for
                 understanding the benefits claimed by the proposed deep
                 learning approach in handwritten Manipuri Meetei-Mayek.
                 (1) Histogram of Oriented (HOG) with SVM classifier is
                 implemented for thoroughly understanding how HOG
                 features can influence accuracy. (2) The handwritten
                 samples are trained using simple Convolutional Neural
                 Network (CNN) and compared with the proposed CNN-based
                 architecture. Significant progress has been made in the
                 field of Optical Character Recognition (OCR) for
                 well-known Indian languages as well as globally popular
                 languages. Our work is novel in the sense that there is
                 no record of work available to date that is able to
                 classify all 56 classes of the MMM. It will also serve
                 as a pre-cursor for developing end-to-end OCR software
                 for translating old manuscripts, newspaper archives,
                 books, and so on.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Gao:2019:SBC,
  author =       "Shengxiang Gao and Jihao Huang and Mingya Xue and
                 Zhengtao Yu and Zhuo Wang and Yang Zhang",
  title =        "Syntax-Based {Chinese--Vietnamese} Tree-to-Tree
                 Statistical Machine Translation with Bilingual
                 Features",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "36:1--36:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314938",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314938",
  abstract =     "Because of the scarcity of bilingual corpora, current
                 Chinese--Vietnamese machine translation is far from
                 satisfactory. Considering the differences between
                 Chinese and Vietnamese, we investigate whether
                 linguistic differences can be used to supervise machine
                 translation and propose a method of syntax-based
                 Chinese--Vietnamese tree-to-tree statistical machine
                 translation with bilingual features. Analyzing the
                 syntax differences between Chinese and Vietnamese, we
                 define some linguistic difference-based rules, such as
                 attributive position, time adverbial position, and
                 locative adverbial position, and create rewards for
                 similar rules. These rewards are integrated into the
                 extraction of tree-to-tree translation rules, and we
                 optimize the pruning of the search space during the
                 decoding phase. The experiments on Chinese--Vietnamese
                 bilingual sentence translation show that the proposed
                 method performs better than several compared methods.
                 Further, the results show that syntactic difference
                 features, with search pruning, can improve the accuracy
                 of machine translation without degrading the
                 efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sun:2019:NSP,
  author =       "Ruiyong Sun and Yijia Zhao and Qi Zhang and Keyu Ding
                 and Shijin Wang and Cui Wei",
  title =        "A Neural Semantic Parser for Math Problems
                 Incorporating Multi-Sentence Information",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "37:1--37:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314939",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314939",
  abstract =     "In this article, we study the problem of parsing a
                 math problem into logical forms. It is an essential
                 pre-processing step for automatically solving math
                 problems. Most of the existing studies about semantic
                 parsing mainly focused on the single-sentence level.
                 However, for parsing math problems, we need to take the
                 information of multiple sentences into consideration.
                 To achieve the task, we formulate the task as a machine
                 translation problem and extend the sequence-to-sequence
                 model with a novel two-encoder architecture and a
                 word-level selective mechanism. For training and
                 evaluating the proposed method, we construct a
                 large-scale dataset. Experimental results show that the
                 proposed two-encoder architecture and word-level
                 selective mechanism could bring significant
                 improvement. The proposed method can achieve better
                 performance than the state-of-the-art methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Maimaiti:2019:MRT,
  author =       "Mieradilijiang Maimaiti and Yang Liu and Huanbo Luan
                 and Maosong Sun",
  title =        "Multi-Round Transfer Learning for Low-Resource {NMT}
                 Using Multiple High-Resource Languages",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "38:1--38:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314945",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314945",
  abstract =     "Neural machine translation (NMT) has made remarkable
                 progress in recent years, but the performance of NMT
                 suffers from a data sparsity problem since large-scale
                 parallel corpora are only readily available for
                 high-resource languages (HRLs). In recent days,
                 transfer learning (TL) has been used widely in
                 low-resource languages (LRLs) machine translation,
                 while TL is becoming one of the vital directions for
                 addressing the data sparsity problem in low-resource
                 NMT. As a solution, a transfer learning method in NMT
                 is generally obtained via initializing the low-resource
                 model (child) with the high-resource model (parent).
                 However, leveraging the original TL to low-resource
                 models is neither able to make full use of highly
                 related multiple HRLs nor to receive different
                 parameters from the same parents. In order to exploit
                 multiple HRLs effectively, we present a
                 language-independent and straightforward multi-round
                 transfer learning (MRTL) approach to low-resource NMT.
                 Besides, with the intention of reducing the differences
                 between high-resource and low-resource languages at the
                 character level, we introduce a unified transliteration
                 method for various language families, which are both
                 semantically and syntactically highly analogous with
                 each other. Experiments on low-resource datasets show
                 that our approaches are effective, significantly
                 outperform the state-of-the-art methods, and yield
                 improvements of up to 5.63 BLEU points.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ihasz:2019:SFS,
  author =       "Peter Lajos Ihasz and Mate Kovacs and Ian Piumarta and
                 Victor V. Kryssanov",
  title =        "A Supplementary Feature Set for Sentiment Analysis in
                 {Japanese} Dialogues",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "39:1--39:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3310283",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3310283",
  abstract =     "Recently, real-time affect-awareness has been applied
                 in several commercial systems, such as dialogue systems
                 and computer games. Real-time recognition of affective
                 states, however, requires the application of costly
                 feature extraction methods and/or labor-intensive
                 annotation of large datasets, especially in the case of
                 Asian languages where large annotated datasets are
                 seldom available. To improve recognition accuracy, we
                 propose the use of cognitive context in the form of
                 ``emotion-sensitive'' intentions. Intentions are often
                 represented through dialogue acts and, as an
                 emotion-sensitive model of dialogue acts, a tagset of
                 interpersonal-relations-directing interpersonal acts
                 (the IA model) is proposed. The model's adequacy is
                 assessed using a sentiment classification task in
                 comparison with two well-known dialogue act models, the
                 SWBD-DAMSL and the DIT++. For the assessment, five
                 Japanese in-game dialogues were annotated with labels
                 of sentiments and the tags of all three dialogue act
                 models which were used to enhance a baseline sentiment
                 classifier system. The adequacy of the IA tagset is
                 demonstrated by a 9\% improvement to the baseline
                 sentiment classifier's recognition accuracy,
                 outperforming the other two models by more than 5\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Saeed:2019:SAC,
  author =       "Ali Saeed and Rao Muhammad Adeel Nawab and Mark
                 Stevenson and Paul Rayson",
  title =        "A Sense Annotated Corpus for All-Words {Urdu} Word
                 Sense Disambiguation",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "40:1--40:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314940",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314940",
  abstract =     "Word Sense Disambiguation (WSD) aims to automatically
                 predict the correct sense of a word used in a given
                 context. All human languages exhibit word sense
                 ambiguity, and resolving this ambiguity can be
                 difficult. Standard benchmark resources are required to
                 develop, compare, and evaluate WSD techniques. These
                 are available for many languages, but not for Urdu,
                 despite this being a language with more than 300
                 million speakers and large volumes of text available
                 digitally. To fill this gap, this study proposes a
                 novel benchmark corpus for the Urdu All-Words WSD task.
                 The corpus contains 5,042 words of Urdu running text in
                 which all ambiguous words (856 instances) are manually
                 tagged with senses from the Urdu Lughat dictionary. A
                 range of baseline WSD models based on n -gram are
                 applied to the corpus, and the best performance
                 (accuracy of 57.71\%) is achieved using word 4-gram.
                 The corpus is freely available to the research
                 community to encourage further WSD research in Urdu.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Dahou:2019:MCE,
  author =       "Abdelghani Dahou and Shengwu Xiong and Junwei Zhou and
                 Mohamed Abd Elaziz",
  title =        "Multi-Channel Embedding Convolutional Neural Network
                 Model for {Arabic} Sentiment Classification",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "41:1--41:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314941",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314941",
  abstract =     "With the advent of social network services, Arabs'
                 opinions on the web have attracted many researchers in
                 recent years toward detecting and classifying
                 sentiments in Arabic tweets and reviews. However, the
                 impact of word embeddings vectors (WEVs) initialization
                 and dataset balance on Arabic sentiment classification
                 using deep learning has not been thoroughly studied. In
                 this article, a multi-channel embedding convolutional
                 neural network (MCE-CNN) is proposed to improve Arabic
                 sentiment classification by learning sentiment features
                 from different text domains, word, and character
                 n-grams levels. MCE-CNN encodes a combination of
                 different pre-trained word embeddings into the
                 embedding block at each embedding channel and trains
                 these channels in parallel. Besides, a separate feature
                 extraction module implemented in a CNN block is used to
                 extract more relevant sentiment features. These
                 channels and blocks help to start training on
                 high-quality WEVs and fine-tuning them. The performance
                 of MCE-CNN is evaluated on several standard balanced
                 and imbalanced datasets to reflect real-world use
                 cases. Experimental results show that MCE-CNN provides
                 a high classification accuracy and benefits from the
                 second embedding channel on both standard Arabic and
                 dialectal Arabic text, which outperforms
                 state-of-the-art methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Onyenwe:2019:TEI,
  author =       "Ikechukwu E. Onyenwe and Mark Hepple and Uchechukwu
                 Chinedu and Ignatius Ezeani",
  title =        "Toward an Effective {Igbo} Part-of-Speech Tagger",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "42:1--42:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314942",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314942",
  abstract =     "Part-of-speech (POS) tagging is a well-established
                 technology for most Western European languages and a
                 few other world languages, but it has not been
                 evaluated on Igbo, an agglutinative African language.
                 This article presents POS tagging experiments conducted
                 using an Igbo corpus as a test bed for identifying the
                 POS taggers and the Machine Learning (ML) methods that
                 can achieve a good performance with the small dataset
                 available for the language. Experiments have been
                 conducted using different well-known POS taggers
                 developed for English or European languages, and
                 different training data styles and sizes. Igbo has a
                 number of language-specific characteristics that
                 present a challenge for effective POS tagging. One
                 interesting case is the wide use of verbs (and
                 nominalizations thereof) that have an inherent noun
                 complement, which form ``linked pairs'' in the POS
                 tagging scheme, but which may appear discontinuously.
                 Another issue is Igbo's highly productive agglutinative
                 morphology, which can produce many variant word forms
                 from a given root. This productivity is a key cause of
                 the out-of-vocabulary (OOV) words observed during Igbo
                 tagging. We report results of experiments on a
                 promising direction for improving tagging performance
                 on such morphologically-inflected OOV words.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Costa-Jussa:2019:CCN,
  author =       "Marta R. Costa-Juss{\`a} and No{\'e} Casas and Carlos
                 Escolano and Jos{\'e} A. R. Fonollosa",
  title =        "{Chinese--Catalan}: a Neural Machine Translation
                 Approach Based on Pivoting and Attention Mechanisms",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "43:1--43:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3312575",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3312575",
  abstract =     "This article innovatively addresses machine
                 translation from Chinese to Catalan using neural pivot
                 strategies trained without any direct parallel data.
                 The Catalan language is very similar to Spanish from a
                 linguistic point of view, which motivates the use of
                 Spanish as pivot language. Regarding neural
                 architecture, we are using the latest state-of-the-art,
                 which is the Transformer model, only based on attention
                 mechanisms. Additionally, this work provides new
                 resources to the community, which consists of a
                 human-developed gold standard of 4,000 sentences
                 between Catalan and Chinese and all the others United
                 Nations official languages (Arabic, English, French,
                 Russian, and Spanish). Results show that the standard
                 pseudo-corpus or synthetic pivot approach performs
                 better than cascade.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yu:2019:MTE,
  author =       "Hui Yu and Weizhi Xu and Shouxun Lin and Qun Liu",
  title =        "Machine Translation Evaluation Metric Based on
                 Dependency Parsing Model",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "44:1--44:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3312573",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3312573",
  abstract =     "Most of the syntax-based metrics obtain the similarity
                 by comparing the sub-structures extracted from the
                 trees of hypothesis and reference. These sub-structures
                 cannot represent all the information in the trees
                 because their lengths are limited. To sufficiently use
                 the reference syntax information, a new automatic
                 evaluation metric is proposed based on the dependency
                 parsing model. First, a dependency parsing model is
                 trained using the reference dependency tree for each
                 sentence. Then, the hypothesis is parsed by this
                 dependency parsing model and the corresponding
                 hypothesis dependency tree is generated. The quality of
                 hypothesis can be judged by the quality of the
                 hypothesis dependency tree. Unigram F-score is included
                 in the new metric so that lexicon similarity is
                 obtained. According to experimental results, the
                 proposed metric can perform better than METEOR and BLEU
                 on system level and get comparable results with METEOR
                 on sentence level. To further improve the performance,
                 we also propose a combined metric which gets the best
                 performance on the sentence level and on the system
                 level.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2019:EBC,
  author =       "Yang Liu and Shaonan Wang and Jiajun Zhang and
                 Chengqing Zong",
  title =        "Experience-based Causality Learning for Intelligent
                 Agents",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "45:1--45:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314943",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314943",
  abstract =     "Understanding causality in text is crucial for
                 intelligent agents. In this article, inspired by human
                 causality learning, we propose an experience-based
                 causality learning framework. Comparing to traditional
                 approaches, which attempt to handle the causality
                 problem relying on textual clues and linguistic
                 resources, we are the first to use experience
                 information for causality learning. Specifically, we
                 first construct various scenarios for intelligent
                 agents, thus, the agents can gain experience from
                 interaction in these scenarios. Then, human
                 participants build a number of training instances for
                 agents of causality learning based on these scenarios.
                 Each instance contains two sentences and a label. Each
                 sentence describes an event that an agent experienced
                 in a scenario, and the label indicates whether the
                 sentence (event) pair has a causal relation.
                 Accordingly, we propose a model that can infer the
                 causality in text using experience by accessing the
                 corresponding event information based on the input
                 sentence pair. Experiment results show that our method
                 can achieve impressive performance on the grounded
                 causality corpus and significantly outperform the
                 conventional approaches. Our work suggests that
                 experience is very important for intelligent agents to
                 understand causality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yin:2019:PTE,
  author =       "Yongjing Yin and Jinsong Su and Huating Wen and Jiali
                 Zeng and Yang Liu and Yidong Chen",
  title =        "{POS} Tag-enhanced Coarse-to-fine Attention for Neural
                 Machine Translation",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "46:1--46:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3321124",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3321124",
  abstract =     "Although neural machine translation (NMT) has certain
                 capability to implicitly learn semantic information of
                 sentences, we explore and show that Part-of-Speech
                 (POS) tags can be explicitly incorporated into the
                 attention mechanism of NMT effectively to yield further
                 improvements. In this article, we propose an NMT model
                 with tag-enhanced attention mechanism. In our model,
                 NMT and POS tagging are jointly modeled via multi-task
                 learning. Besides following common practice to enrich
                 encoder annotations by introducing predicted source POS
                 tags, we exploit predicted target POS tags to refine
                 attention model in a coarse-to-fine manner.
                 Specifically, we first implement a coarse attention
                 operation solely on source annotations and target
                 hidden state, where the produced context vector is
                 applied to update target hidden state used for target
                 POS tagging. Then, we perform a fine attention
                 operation that extends the coarse one by further
                 exploiting the predicted target POS tags. Finally, we
                 facilitate word prediction by simultaneously utilizing
                 the context vector from fine attention and the
                 predicted target POS tags. Experimental results and
                 further analyses on Chinese--English and
                 Japanese-English translation tasks demonstrate the
                 superiority of our proposed model over the conventional
                 NMT models. We release our code at
                 https://github.com/middlekisser/PEA-NMT.git.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yang:2019:MEA,
  author =       "Jun Yang and Runqi Yang and Hengyang Lu and Chongjun
                 Wang and Junyuan Xie",
  title =        "Multi-Entity Aspect-Based Sentiment Analysis with
                 Context, Entity, Aspect Memory and Dependency
                 Information",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "47:1--47:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3321125",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3321125",
  abstract =     "Fine-grained sentiment analysis is a useful tool for
                 producers to understand consumers' needs as well as
                 complaints about products and related aspects from
                 online platforms. In this article, we define a novel
                 task named ``Multi-Entity Aspect-Based Sentiment
                 Analysis (ME-ABSA)''. It investigates the sentiment
                 towards entities and their related aspects. It makes
                 the well-studied aspect-based sentiment analysis a
                 special case of this type, where the number of entities
                 is limited to one. We contribute a new dataset for this
                 task, with multi-entity Chinese posts in it. We propose
                 to model context, entity, and aspect memory to address
                 the task and incorporate dependency information for
                 further improvement. Experiments show that our methods
                 perform significantly better than baseline methods on
                 datasets for both ME-ABSA task and ABSA task. The
                 in-depth analysis further validates the effectiveness
                 of our methods and shows that our methods are capable
                 of generalizing to new (entity, aspect) combinations
                 with little loss of accuracy. This observation
                 indicates that data annotation in real applications can
                 be largely simplified.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kim:2019:MTS,
  author =       "Hyun Kim and Jong-Hyeok Lee and Seung-Hoon Na",
  title =        "Multi-task Stack Propagation for Neural Quality
                 Estimation",
  journal =      j-TALLIP,
  volume =       "18",
  number =       "4",
  pages =        "48:1--48:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3321127",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Oct 2 10:34:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3321127",
  abstract =     "Quality estimation is an important task in machine
                 translation that has attracted increased interest in
                 recent years. A key problem in translation-quality
                 estimation is the lack of a sufficient amount of the
                 quality annotated training data. To address this
                 shortcoming, the Predictor-Estimator was proposed
                 recently by introducing ``word prediction'' as an
                 additional pre-subtask that predicts a current target
                 word with consideration of surrounding source and
                 target contexts, resulting in a two-stage neural model
                 composed of a predictor and an estimator. However, the
                 original Predictor-Estimator is not trained on a
                 continuous stacking model but instead in a cascaded
                 manner that separately trains the predictor from the
                 estimator. In addition, the Predictor-Estimator is
                 trained based on single-task learning only, which uses
                 target-specific quality-estimation data without using
                 other training data that are available from other-level
                 quality-estimation tasks. In this article, we thus
                 propose a multi-task stack propagation, which
                 extensively applies stack propagation to fully train
                 the Predictor-Estimator on a continuous stacking
                 architecture and multi-task learning to enhance the
                 training data from related other-level
                 quality-estimation tasks. Experimental results on WMT17
                 quality-estimation datasets show that the
                 Predictor-Estimator trained with multi-task stack
                 propagation provides statistically significant
                 improvements over the baseline models. In particular,
                 under an ensemble setting, the proposed multi-task
                 stack propagation leads to state-of-the-art performance
                 at all the sentence/word/phrase levels for WMT17
                 quality estimation tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2020:GCL,
  author =       "Hongmin Wang and Jie Yang and Yue Zhang",
  title =        "From {Genesis} to {Creole} Language: Transfer Learning
                 for {Singlish} Universal Dependencies Parsing and {POS}
                 Tagging",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--29",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3321128",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3321128",
  abstract =     "Singlish can be interesting to the computational
                 linguistics community both linguistically, as a major
                 low-resource creole based on English, and
                 computationally, for information extraction and
                 sentiment analysis of regional social media. In our
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kong:2020:CZP,
  author =       "Fang Kong and Min Zhang and Guodong Zhou",
  title =        "{Chinese} Zero Pronoun Resolution: a Chain-to-chain
                 Approach",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--21",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3321129",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3321129",
  abstract =     "Chinese zero pronoun (ZP) resolution plays a critical
                 role in discourse analysis. Different from traditional
                 mention-to-mention approaches, this article proposes a
                 chain-to-chain approach to improve the performance of
                 ZP resolution in three aspects. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yin:2020:CZP,
  author =       "Qingyu Yin and Weinan Zhang and Yu Zhang and Ting
                 Liu",
  title =        "{Chinese} Zero Pronoun Resolution: a Collaborative
                 Filtering-based Approach",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--20",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3325884",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3325884",
  abstract =     "Semantic information that has been proven to be
                 necessary to the resolution of common noun phrases is
                 typically ignored by most existing Chinese zero pronoun
                 resolvers. This is because that zero pronouns convey no
                 descriptive information, which makes it \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Das:2020:TCT,
  author =       "Ayan Das and Sudeshna Sarkar",
  title =        "Transform, Combine, and Transfer: Delexicalized
                 Transfer Parser for Low-resource Languages",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--30",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3325886",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3325886",
  abstract =     "Transfer parsing has been used for developing
                 dependency parsers for languages with no treebank by
                 using transfer from treebanks of other languages
                 (source languages). In delexicalized transfer, parsed
                 words are replaced by their part-of-speech tags.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ding:2020:TBM,
  author =       "Chenchen Ding and Hnin Thu Zar Aye and Win Pa Pa and
                 Khin Thandar Nwet and Khin Mar Soe and Masao Utiyama
                 and Eiichiro Sumita",
  title =        "Towards {Burmese} ({Myanmar}) Morphological Analysis:
                 Syllable-based Tokenization and Part-of-speech
                 Tagging",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--34",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3325885",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3325885",
  abstract =     "This article presents a comprehensive study on two
                 primary tasks in Burmese (Myanmar) morphological
                 analysis: tokenization and part-of-speech (POS)
                 tagging. Twenty thousand Burmese sentences of newswire
                 are annotated with two-layer tokenization and
                 POS-\ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2020:AMC,
  author =       "Dayiheng Liu and Kexin Yang and Qian Qu and Jiancheng
                 Lv",
  title =        "Ancient--Modern {Chinese} Translation with a New Large
                 Training Dataset",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--13",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3325887",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3325887",
  abstract =     "Ancient Chinese brings the wisdom and spirit culture
                 of the Chinese nation. Automatic translation from
                 ancient Chinese to modern Chinese helps to inherit and
                 carry forward the quintessence of the ancients.
                 However, the lack of large-scale parallel \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2020:CSP,
  author =       "Wei Wang and Degen Huang and Jingxiang Cao",
  title =        "{Chinese} Syntax Parsing Based on Sliding Match of
                 Semantic String",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--14",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3329707",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3329707",
  abstract =     "Different from the current syntax parsing based on
                 deep learning, we present a novel Chinese parsing
                 method, which is based on Sliding Match of Semantic
                 String (SMOSS). (1) Training stage: In a treebank,
                 headwords of tree nodes are represented by \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kanwal:2020:UNE,
  author =       "Safia Kanwal and Kamran Malik and Khurram Shahzad and
                 Faisal Aslam and Zubair Nawaz",
  title =        "{Urdu} Named Entity Recognition: Corpus Generation and
                 Deep Learning Applications",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--13",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3329710",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3329710",
  abstract =     "Named Entity Recognition (NER) plays a pivotal role in
                 various natural language processing tasks, such as
                 machine translation and automatic question-answering
                 systems. Recognizing the importance of NER, a plethora
                 of NER techniques for Western and Asian \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2020:DCW,
  author =       "Yijia Liu and Wanxiang Che and Yuxuan Wang and Bo
                 Zheng and Bing Qin and Ting Liu",
  title =        "Deep Contextualized Word Embeddings for Universal
                 Dependency Parsing",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--17",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3326497",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3326497",
  abstract =     "Deep contextualized word embeddings (Embeddings from
                 Language Model, short for ELMo), as an emerging and
                 effective replacement for the static word embeddings,
                 have achieved success on a bunch of syntactic and
                 semantic NLP problems. However, little is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mehmood:2020:SAR,
  author =       "Khawar Mehmood and Daryl Essam and Kamran Shafi and
                 Muhammad Kamran Malik",
  title =        "Sentiment Analysis for a Resource Poor Language
                 ---{Roman Urdu}",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--15",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3329709",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3329709",
  abstract =     "Sentiment analysis is an important sub-task of Natural
                 Language Processing that aims to determine the polarity
                 of a review. Most of the work done on sentiment
                 analysis is for the resource-rich languages of the
                 world, but very limited work has been done \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bakhshaei:2020:MGM,
  author =       "Somayeh Bakhshaei and Reza Safabakhsh and Shahram
                 Khadivi",
  title =        "Matching Graph, a Method for Extracting Parallel
                 Information from Comparable Corpora",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--29",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3329713",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3329713",
  abstract =     "Comparable corpora are valuable alternatives for the
                 expensive parallel corpora. They comprise informative
                 parallel fragments that are useful resources for
                 different natural language processing tasks. In this
                 work, a generative model is proposed for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2020:FTV,
  author =       "Dayiheng Liu and Yang Xue and Feng He and Yuanyuan
                 Chen and Jiancheng Lv",
  title =        "$ \mu $-Forcing: Training Variational Recurrent
                 Autoencoders for Text Generation",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--17",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3341110",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3341110",
  abstract =     "It has been previously observed that training
                 Variational Recurrent Autoencoders (VRAE) for text
                 generation suffers from serious uninformative latent
                 variables problems. The model would collapse into a
                 plain language model that totally ignores the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Srivastava:2020:AMA,
  author =       "Jyoti Srivastava and Sudip Sanyal and Ashish Kumar
                 Srivastava",
  title =        "An Automatic and a Machine-assisted Method to Clean
                 Bilingual Corpus",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--19",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3342351",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3342351",
  abstract =     "Two different methods of corpus cleaning are presented
                 in this article. One is a machine-assisted technique,
                 which is good to clean small-sized parallel corpus, and
                 the other is an automatic method, which is suitable for
                 cleaning large-sized parallel \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Prakash:2020:ISP,
  author =       "Jeena J. Prakash and Golda Brunet Rajan and Hema A.
                 Murthy",
  title =        "Importance of Signal Processing Cues in Transcription
                 Correction for Low-Resource {Indian} Languages",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--26",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3342352",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3342352",
  abstract =     "Accurate phonetic transcriptions are crucial for
                 building robust acoustic models for speech recognition
                 as well as speech synthesis applications. Phonetic
                 transcriptions are not usually provided with speech
                 corpora. A lexicon is used to generate phone-\ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Han:2020:EMW,
  author =       "Dong Han and Junhui Li and Yachao Li and Min Zhang and
                 Guodong Zhou",
  title =        "Explicitly Modeling Word Translations in Neural
                 Machine Translation",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--17",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3342353",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3342353",
  abstract =     "In this article, we show that word translations can be
                 explicitly incorporated into NMT effectively to avoid
                 wrong translations. Specifically, we propose three
                 cross-lingual encoders to explicitly incorporate word
                 translations into NMT: (1) Factored\ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Chakrabarty:2020:NNM,
  author =       "Abhisek Chakrabarty and Akshay Chaturvedi and Utpal
                 Garain",
  title =        "{NeuMorph}: Neural Morphological Tagging for
                 Low-Resource Languages --- an Experimental Study for
                 {Indic} Languages",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--19",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3342354",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3342354",
  abstract =     "This article deals with morphological tagging for
                 low-resource languages. For this purpose, five Indic
                 languages are taken as reference. In addition, two
                 severely resource-poor languages, Coptic and Kurmanji,
                 are also considered. The task entails \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ji:2020:ATU,
  author =       "Yatu Ji and Hongxu Hou and Junjie Chen and Nier Wu",
  title =        "Adversarial Training for Unknown Word Problems in
                 Neural Machine Translation",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "1",
  pages =        "1--12",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3342482",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Jan 10 08:11:41 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3342482",
  abstract =     "Nearly all of the work in neural machine translation
                 (NMT) is limited to a quite restricted vocabulary,
                 crudely treating all other words the same as an unk
                 symbol. For the translation of language with abundant
                 morphology, unknown (UNK) words also \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhu:2020:OSK,
  author =       "Qingfu Zhu and Weinan Zhang and Lei Cui and Ting Liu",
  title =        "Order-Sensitive Keywords Based Response Generation in
                 Open-Domain Conversational Systems",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "18:1--18:18",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3343258",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3343258",
  abstract =     "External keywords are crucial for response generation
                 models to address the generic response problems in
                 open-domain conversational systems. The occurrence of
                 keywords in a response depends heavily on the order of
                 the keywords as they are generated \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhou:2020:NCG,
  author =       "Guangyou Zhou and Yizhen Fang and Yehong Peng and
                 Jiaheng Lu",
  title =        "Neural Conversation Generation with Auxiliary
                 Emotional Supervised Models",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "19:1--19:17",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3344788",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3344788",
  abstract =     "An important aspect of developing dialogue agents
                 involves endowing a conversation system with emotion
                 perception and interaction. Most existing emotion
                 dialogue models lack the adaptability and extensibility
                 of different scenes because of their \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhu:2020:EDC,
  author =       "Wenhao Zhu and Xin Jin and Shuang Liu and Zhiguo Lu
                 and Wu Zhang and Ke Yan and Baogang Wei",
  title =        "Enhanced Double-Carrier Word Embedding via Phonetics
                 and Writing",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "20:1--20:18",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3344920",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3344920",
  abstract =     "Word embeddings, which map words into a unified vector
                 space, capture rich semantic information. From a
                 linguistic point of view, words have two carriers,
                 speech and writing. Yet the most recent word embedding
                 models focus on only the writing carrier \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Dehkharghani:2020:SPP,
  author =       "Rahim Dehkharghani",
  title =        "{SentiFars}: a {Persian} Polarity Lexicon for
                 Sentiment Analysis",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "21:1--21:12",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3345627",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3345627",
  abstract =     "There is no doubt about the usefulness of public
                 opinion toward different issues in social media and the
                 World Wide Web. Extracting the feelings of people about
                 an issue from text is not straightforward. Polarity
                 lexicons that assign polarity tags or \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Abdulhameed:2020:WVT,
  author =       "Tiba Zaki Abdulhameed and Imed Zitouni and Ikhlas
                 Abdel-Qader",
  title =        "{Wasf-Vec}: Topology-based Word Embedding for Modern
                 Standard {Arabic} and {Iraqi} Dialect Ontology",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "22:1--22:27",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3345517",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3345517",
  abstract =     "Word clustering is a serious challenge in low-resource
                 languages. Since words that share semantics are
                 expected to be clustered together, it is common to use
                 a feature vector representation generated from a
                 distributional theory-based word embedding \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Xu:2020:EPS,
  author =       "Ge Xu and Xiaoyan Yang and Yuanzheng Cai and Zhiqiang
                 Ruan and Tao Wang and Xiangwen Liao",
  title =        "Extracting Polarity Shifting Patterns from Any Corpus
                 Based on Natural Annotation",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "23:1--23:16",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3345518",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3345518",
  abstract =     "In recent years, online sentiment texts are generated
                 by users in various domains and in different languages.
                 Binary polarity classification (positive or negative)
                 on business sentiment texts can help both companies and
                 customers to evaluate products or \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Imankulova:2020:FPP,
  author =       "Aizhan Imankulova and Takayuki Sato and Mamoru
                 Komachi",
  title =        "Filtered Pseudo-parallel Corpus Improves Low-resource
                 Neural Machine Translation",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "24:1--24:16",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3341726",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3341726",
  abstract =     "Large-scale parallel corpora are essential for
                 training high-quality machine translation systems;
                 however, such corpora are not freely available for many
                 language translation pairs. Previously, training data
                 has been augmented by pseudo-parallel corpora
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Gupta:2020:DNN,
  author =       "Deepak Gupta and Asif Ekbal and Pushpak
                 Bhattacharyya",
  title =        "A Deep Neural Network Framework for {English} {Hindi}
                 Question Answering",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "25:1--25:22",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3359988",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3359988",
  abstract =     "In this article, we propose a unified deep neural
                 network framework for multilingual question answering
                 (QA). The proposed network deals with the multilingual
                 questions and answers snippets. The input to the
                 network is a pair of factoid question and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yu:2020:LWT,
  author =       "Hongfei Yu and Xiaoqing Zhou and Xiangyu Duan and Min
                 Zhang",
  title =        "Layer-Wise De-Training and Re-Training for {ConvS2S}
                 Machine Translation",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "26:1--26:15",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358414",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3358414",
  abstract =     "The convolutional sequence-to-sequence (ConvS2S)
                 machine translation system is one of the typical neural
                 machine translation (NMT) systems. Training the ConvS2S
                 model tends to get stuck in a local optimum in our
                 pre-studies. To overcome this inferior \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Somsap:2020:IDW,
  author =       "Sittichai Somsap and Pusadee Seresangtakul",
  title =        "{Isarn Dharma} Word Segmentation Using a Statistical
                 Approach with Named Entity Recognition",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "27:1--27:16",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3359990",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3359990",
  abstract =     "In this study, we developed an Isarn Dharma word
                 segmentation system. We mainly focused on solving the
                 word ambiguity and unknown word problems in unsegmented
                 Isarn Dharma text. Ambiguous Isarn Dharma words occur
                 frequently in word construction due to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Abbas:2020:PIR,
  author =       "Muhammad Raihan Abbas and Dr. Khadim Hussain Asif",
  title =        "{Punjabi} to {ISO 15919} and {Roman} Transliteration
                 with Phonetic Rectification",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "28:1--28:20",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3359991",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3359991",
  abstract =     "Transliteration removes the script barriers.
                 Unfortunately, Punjabi is written in four different
                 scripts, i.e., Gurmukhi, Shahmukhi, Devnagri, and
                 Latin. The Latin script is understandable for nearly
                 all factions of the Punjabi community. The objective
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Beseiso:2020:SAM,
  author =       "Majdi Beseiso and Haytham Elmousalami",
  title =        "Subword Attentive Model for {Arabic} Sentiment
                 Analysis: a Deep Learning Approach",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "29:1--29:17",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3360016",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3360016",
  abstract =     "Social media data is unstructured data where these big
                 data are exponentially increasing day to day in many
                 different disciplines. Analysis and understanding the
                 semantics of these data are a big challenge due to its
                 variety and huge volume. To address \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Harikrishna:2020:CSC,
  author =       "D. M. Harikrishna and K. Sreenivasa Rao",
  title =        "{Children}'s Story Classification in {Indian}
                 Languages Using Linguistic and Keyword-based Features",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "30:1--30:22",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3342356",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3342356",
  abstract =     "The primary objective of this work is to classify
                 Hindi and Telugu stories into three genres: fable,
                 folk-tale, and legend. In this work, we are proposing a
                 framework for story classification (SC) using keyword
                 and part-of-speech (POS) features. For \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jung:2020:WRT,
  author =       "Hun-Young Jung and Jong-Hyeok Lee and Eunju Min and
                 Seung-Hoon Na",
  title =        "Word Reordering for Translation into {Korean} Sign
                 Language Using Syntactically-guided Classification",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "31:1--31:20",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3357612",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3357612",
  abstract =     "Machine translation aims to break the language barrier
                 that prevents communication with others and increase
                 access to information. Deaf people face huge language
                 barriers in their daily lives, including access to
                 digital and spoken information. There \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Masmoudi:2020:TAA,
  author =       "Abir Masmoudi and Mariem Ellouze Khmekhem and Mourad
                 Khrouf and Lamia Hadrich Belguith",
  title =        "Transliteration of {Arabizi} into {Arabic} Script for
                 {Tunisian} Dialect",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "32:1--32:21",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3364319",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3364319",
  abstract =     "The evolution of information and communication
                 technology has markedly influenced communication
                 between correspondents. This evolution has facilitated
                 the transmission of information and has engendered new
                 forms of written communication (email, chat,
                 \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mukherjee:2020:FST,
  author =       "Subham Mukherjee and Pradeep Kumar and Partha Pratim
                 Roy",
  title =        "Fusion of Spatio-temporal Information for {Indic} Word
                 Recognition Combining Online and Offline Text Data",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "2",
  pages =        "33:1--33:24",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3364533",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:05:40 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3364533",
  abstract =     "We present a novel Indic handwritten word recognition
                 scheme by fusion of spatio-temporal information
                 extracted from handwritten images. The main challenge
                 in Indic word recognition lies in its complexity
                 because of modifiers, touching characters, and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yu:2020:ELR,
  author =       "Zhiqiang Yu and Zhengtao Yu and Junjun Guo and Yuxin
                 Huang and Yonghua Wen",
  title =        "Efficient Low-Resource Neural Machine Translation with
                 Reread and Feedback Mechanism",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "34:1--34:13",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3365244",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365244",
  abstract =     "How to utilize information sufficiently is a key
                 problem in neural machine translation (NMT), which is
                 effectively improved in rich-resource NMT by leveraging
                 large-scale bilingual sentence pairs. However, for
                 low-resource NMT, lack of bilingual \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Park:2020:NSB,
  author =       "Cheoneum Park and Heejun Song and Changki Lee",
  title =        "{$ S^3$-NET}: {SRU}-Based Sentence and Self-Matching
                 Networks for Machine Reading Comprehension",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "35:1--35:14",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3365679",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365679",
  abstract =     "Machine reading comprehension question answering
                 (MRC-QA) is the task of understanding the context of a
                 given passage to find a correct answer within it. A
                 passage is composed of several sentences; therefore,
                 the length of the input sentence becomes \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sarwar:2020:SSF,
  author =       "Raheem Sarwar and Thanasarn Porthaveepong and Attapol
                 Rutherford and Thanawin Rakthanmanon and Sarana
                 Nutanong",
  title =        "{StyloThai}:: a Scalable Framework for Stylometric
                 Authorship Identification of {Thai} Documents",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "36:1--36:15",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3365832",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365832",
  abstract =     "Authorship identification helps to identify the true
                 author of a given anonymous document from a set of
                 candidate authors. The applications of this task can be
                 found in several domains, such as law enforcement
                 agencies and information retrieval. These \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kim:2020:UIB,
  author =       "Hyun Kim and Seung-Hoon Na",
  title =        "Uniformly Interpolated Balancing for Robust Prediction
                 in Translation Quality Estimation: a Case Study of
                 {English--Korean} Translation",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "37:1--37:27",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3365916",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365916",
  abstract =     "There has been growing interest among researchers in
                 quality estimation (QE), which attempts to
                 automatically predict the quality of machine
                 translation (MT) outputs. Most existing works on QE are
                 based on supervised approaches using quality-annotated
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhou:2020:LMU,
  author =       "Xiao Zhou and Zhen-Hua Ling and Li-Rong Dai",
  title =        "Learning and Modeling Unit Embeddings Using Deep
                 Neural Networks for Unit-Selection-Based {Mandarin}
                 Speech Synthesis",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "38:1--38:14",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3372244",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372244",
  abstract =     "A method of learning and modeling unit embeddings
                 using deep neutral networks (DNNs) is presented in this
                 article for unit-selection-based Mandarin speech
                 synthesis. Here, a unit embedding is defined as a
                 fixed-length embedding vector for a phone-sized
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mirzaei:2020:SRL,
  author =       "Azadeh Mirzaei and Fatemeh Sedghi and Pegah Safari",
  title =        "Semantic Role Labeling System for {Persian} Language",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "39:1--39:12",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3372246",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372246",
  abstract =     "In this article, we present an automatic semantic role
                 labeling system in Persian consisting of two modules:
                 argument identification for specifying argument spans
                 and argument classification for categorizing their
                 semantic roles. Our modules have been \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ding:2020:BMT,
  author =       "Chenchen Ding and Sann Su Su Yee and Win Pa Pa and
                 Khin Mar Soe and Masao Utiyama and Eiichiro Sumita",
  title =        "A {Burmese} ({Myanmar}) {Treebank}: Guideline and
                 Analysis",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "40:1--40:13",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3373268",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3373268",
  abstract =     "A 20,000-sentence Burmese (Myanmar) treebank on news
                 articles has been released under a CC BY-NC-SA license.
                 Complete phrase structure annotation was developed for
                 each sentence from the morphologically annotated data
                 prepared in previous work of Ding \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Song:2020:KPS,
  author =       "Hyun-Je Song and Seong-Bae Park",
  title =        "{Korean} Part-of-speech Tagging Based on Morpheme
                 Generation",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "41:1--41:10",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3373608",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3373608",
  abstract =     "Two major problems of Korean part-of-speech (POS)
                 tagging are that the word-spacing unit is not mapped
                 one-to-one to a POS tag and that morphemes should be
                 recovered during POS tagging. Therefore, this article
                 proposes a novel two-step Korean POS tagger \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mi:2020:LIL,
  author =       "Chenggang Mi and Lei Xie and Yanning Zhang",
  title =        "Loanword Identification in Low-Resource Languages with
                 Minimal Supervision",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "43:1--43:22",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3374212",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3374212",
  abstract =     "Bilingual resources play a very important role in many
                 natural language processing tasks, especially the tasks
                 in cross-lingual scenarios. However, it is expensive
                 and time consuming to build such resources. Lexical
                 borrowing happens in almost every \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2020:INM,
  author =       "Yachao Li and Junhui Li and Min Zhang and Yixin Li and
                 Peng Zou",
  title =        "Improving Neural Machine Translation with Linear
                 Interpolation of a Short-Path Unit",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "44:1--44:16",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3377851",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3377851",
  abstract =     "In neural machine translation (NMT), the source and
                 target words are at the two ends of a large deep neural
                 network, normally mediated by a series of non-linear
                 activations. The problem with such consequent
                 non-linear activations is that they \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2020:DUK,
  author =       "Xiao-Yang Liu and Yimeng Zhang and Yukang Liao and
                 Ling Jiang",
  title =        "Dynamic Updating of the Knowledge Base for a
                 Large-Scale Question Answering System",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "45:1--45:13",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3377708",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3377708",
  abstract =     "Today, the knowledge base question answering (KB-QA)
                 system is promising to achieve a large-scale
                 high-quality reply in the e-commerce industry. However,
                 there exist two major challenges to efficiently support
                 large-scale KB-QA systems. On the one hand, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2020:ELM,
  author =       "Shih-Hung Liu and Kuan-Yu Chen and Berlin Chen",
  title =        "Enhanced Language Modeling with Proximity and Sentence
                 Relatedness Information for Extractive Broadcast News
                 Summarization",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "46:1--46:19",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3377407",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3377407",
  abstract =     "The primary task of extractive summarization is to
                 automatically select a set of representative sentences
                 from a text or spoken document that can concisely
                 express the most important theme of the original
                 document. Recently, language modeling (LM) has
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Du:2020:CNL,
  author =       "Qianlong Du and Chengqing Zong and Keh-Yih Su",
  title =        "Conducting Natural Language Inference with
                 Word-Pair-Dependency and Local Context",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "3",
  pages =        "47:1--47:23",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3377704",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Mar 3 09:11:26 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3377704",
  abstract =     "This article proposes to conduct natural language
                 inference with novel Enhanced-Relation-Head-Dependent
                 triplets (RHD triplets), which are constructed via
                 enhancing each word in the RHD triplet with its
                 associated local context. Most previous approaches
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zitouni:2020:ENE,
  author =       "Imed Zitouni",
  title =        "Editorial from the New {Editor-in-Chief}: the Era of
                 Natural Language Processing Innovations on {Asian} and
                 Low-Resource Languages",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "48e:1--48e:2",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3397501",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3397501",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "48e",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yang:2020:OEQ,
  author =       "Jingxuan Yang and Haotian Cui and Si Li and Sheng Gao
                 and Jun Guo and Zhengdong Lu",
  title =        "Outline Extraction with Question-Specific Memory
                 Cells",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "48:1--48:17",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3377707",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3377707",
  abstract =     "Outline extraction has been widely applied in online
                 consultation to help experts quickly understand
                 individual cases. Given a specific case described as
                 unstructured plain text, outline extraction aims to
                 make a summary for this case by answering a set
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zarnoufi:2020:MNB,
  author =       "Randa Zarnoufi and Hamid Jaafar and Mounia Abik",
  title =        "Machine Normalization: Bringing Social Media Text from
                 Non-Standard to Standard Form",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "49:1--49:30",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3378414",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3378414",
  abstract =     "User-generated text in social media communication
                 (SMC) is mainly characterized by non-standard form. It
                 may contain code switching (CS) text, a widespread
                 phenomenon in SMC, in addition to noisy elements used,
                 especially in written conversations (use \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bhattu:2020:ICM,
  author =       "S. Nagesh Bhattu and Satya Krishna Nunna and D. V. L.
                 N. Somayajulu and Binay Pradhan",
  title =        "Improving Code-mixed {POS} Tagging Using Code-mixed
                 Embeddings",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "50:1--50:31",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3380967",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3380967",
  abstract =     "Social media data has become invaluable component of
                 business analytics. A multitude of nuances of social
                 media text make the job of conventional text analytical
                 tools difficult. Code-mixing of text is a phenomenon
                 prevalent among social media users, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ahmad:2020:NER,
  author =       "Muhammad Tayyab Ahmad and Muhammad Kamran Malik and
                 Khurram Shahzad and Faisal Aslam and Asif Iqbal and
                 Zubair Nawaz and Faisal Bukhari",
  title =        "Named Entity Recognition and Classification for
                 {Punjabi Shahmukhi}",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "51:1--51:13",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3383306",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3383306",
  abstract =     "Named entity recognition (NER) refers to the
                 identification of proper nouns from natural language
                 text and classifying them into named entity types, such
                 as person, location, and organization. Due to the
                 widespread applications of NER, numerous NER \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Garg:2020:HES,
  author =       "Kanika Garg and D. K. Lobiyal",
  title =        "{Hindi EmotionNet}: a Scalable Emotion Lexicon for
                 Sentiment Classification of {Hindi} Text",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "52:1--52:35",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3383330",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3383330",
  abstract =     "In this study, we create an emotion lexicon for the
                 Hindi language called Hindi EmotionNet. It can assign
                 emotional affinity to words in IndoWordNet. This
                 lexicon contains 3,839 emotion words, with 1,246
                 positive and 2,399 negative words. We also \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Udomcharoenchaikit:2020:AER,
  author =       "Can Udomcharoenchaikit and Prachya Boonkwan and
                 Peerapon Vateekul",
  title =        "Adversarial Evaluation of Robust Neural Sequential
                 Tagging Methods for {Thai} Language",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "53:1--53:25",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3383201",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3383201",
  abstract =     "Sequential tagging tasks, such as Part-Of-Speech (POS)
                 tagging and Named-Entity Recognition, are the building
                 blocks of many natural language processing
                 applications. Although prior works have reported
                 promising results in standard settings, they often
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sugandhi:2020:SLG,
  author =       "Sugandhi and Parteek Kumar and Sanmeet Kaur",
  title =        "Sign Language Generation System Based on {Indian} Sign
                 Language Grammar",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "54:1--54:26",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3384202",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3384202",
  abstract =     "Sign Language (SL), also known as gesture-based
                 language, is used by people with hearing loss to convey
                 their messages. SL interpreters are required for people
                 who do not have the knowledge of SL, but interpreters
                 are not readily available. Thus, a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sarwar:2020:NLI,
  author =       "Raheem Sarwar and Attapol T. Rutherford and Saeed-Ul
                 Hassan and Thanawin Rakthanmanon and Sarana Nutanong",
  title =        "Native Language Identification of Fluent and Advanced
                 Non-Native Writers",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "55:1--55:19",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3383202",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3383202",
  abstract =     "Native Language Identification (NLI) aims at
                 identifying the native languages of authors by
                 analyzing their text samples written in a non-native
                 language. Most existing studies investigate this task
                 for educational applications such as second language
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Buyuk:2020:CDS,
  author =       "Osman B{\"u}y{\"u}k",
  title =        "Context-Dependent Sequence-to-Sequence {Turkish}
                 Spelling Correction",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "56:1--56:16",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3383200",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/spell.bib;
                 https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3383200",
  abstract =     "In this article, we make use of sequence-to-sequence
                 (seq2seq) models for spelling correction in the
                 agglutinative Turkish language. In the baseline system,
                 misspelled and target words are split into their
                 letters and the letter sequences are fed into
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Khalil:2020:EAC,
  author =       "Hussein Khalil and Taha Osman and Mohammed Miltan",
  title =        "Extracting {Arabic} Composite Names Using Genitive
                 Principles of {Arabic} Grammar",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "57:1--57:16",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3382187",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3382187",
  abstract =     "Named Entity Recognition (NER) is a basic prerequisite
                 of using Natural Language Processing (NLP) for
                 information retrieval. Arabic NER is especially
                 challenging as the language is morphologically rich and
                 has short vowels with no capitalisation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2020:SCH,
  author =       "Kexin Wang and Yu Zhou and Jiajun Zhang and Shaonan
                 Wang and Chengqing Zong",
  title =        "Structurally Comparative Hinge Loss for
                 Dependency-Based Neural Text Representation",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "58:1--58:19",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3387633",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3387633",
  abstract =     "Dependency-based graph convolutional networks
                 (DepGCNs) are proven helpful for text representation to
                 handle many natural language tasks. Almost all previous
                 models are trained with cross-entropy (CE) loss, which
                 maximizes the posterior likelihood \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2020:JME,
  author =       "Maofu Liu and Yukun Zhang and Wenjie Li and Donghong
                 Ji",
  title =        "Joint Model of Entity Recognition and Relation
                 Extraction with Self-attention Mechanism",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "59:1--59:19",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3387634",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3387634",
  abstract =     "In recent years, the joint model of entity recognition
                 (ER) and relation extraction (RE) has attracted more
                 and more attention in the healthcare and medical
                 domains. However, there are some problems with the
                 prior work. The joint model cannot extract \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kumar:2020:LGV,
  author =       "H. R. Shiva Kumar and A. G. Ramakrishnan",
  title =        "{Lipi Gnani}: a Versatile {OCR} for Documents in any
                 Language Printed in {Kannada} Script",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "4",
  pages =        "60:1--60:23",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3387632",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed Jul 8 18:31:46 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3387632",
  abstract =     "A Kannada OCR, called Lipi Gnani, has been designed
                 and developed from scratch, with the motivation of it
                 being able to convert printed text or poetry in Kannada
                 script, without any restriction on vocabulary. The
                 training and test sets have been \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Hao:2020:CST,
  author =       "Ming Hao and Bo Xu and Jing-Yi Liang and Bo-Wen Zhang
                 and Xu-Cheng Yin",
  title =        "{Chinese} Short Text Classification with
                 Mutual-Attention Convolutional Neural Networks",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "61:1--61:13",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3388970",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3388970",
  abstract =     "The methods based on the combination of word-level and
                 character-level features can effectively boost
                 performance on Chinese short text classification. A lot
                 of works concatenate two-level features with little
                 processing, which leads to losing feature \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Xu:2020:SDE,
  author =       "Fan Xu and Jian Luo and Mingwen Wang and Guodong
                 Zhou",
  title =        "Speech-Driven End-to-End Language Discrimination
                 toward {Chinese} Dialects",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "62:1--62:24",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3389021",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3389021",
  abstract =     "Language discrimination among similar languages,
                 varieties, and dialects is a challenging natural
                 language processing task. The traditional text-driven
                 focus leads to poor results. In this article, we
                 explore the effectiveness of speech-driven features
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Chen:2020:IIF,
  author =       "Junjie Chen and Hongxu Hou and Jing Gao",
  title =        "Inside Importance Factors of Graph-Based Keyword
                 Extraction on {Chinese} Short Text",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "63:1--63:15",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3388971",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3388971",
  abstract =     "Keywords are considered to be important words in the
                 text and can provide a concise representation of the
                 text. With the surge of unlabeled short text on the
                 Internet, automatic keyword extraction task has proven
                 useful in other information processing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Lou:2020:EBS,
  author =       "Yinxia Lou and Yue Zhang and Fei Li and Tao Qian and
                 Donghong Ji",
  title =        "Emoji-Based Sentiment Analysis Using Attention
                 Networks",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "64:1--64:13",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3389035",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3389035",
  abstract =     "Emojis are frequently used to express moods, emotions,
                 and feelings in social media. There has been much
                 research on emojis and sentiments. However, existing
                 methods mainly face two limitations. First, they treat
                 emojis as binary indicator features and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhou:2020:DNN,
  author =       "Long Zhou and Jiajun Zhang and Xiaomian Kang and
                 Chengqing Zong",
  title =        "Deep Neural Network--based Machine Translation System
                 Combination",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "65:1--65:19",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3389791",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3389791",
  abstract =     "Deep neural networks (DNNs) have provably enhanced the
                 state-of-the-art natural language process (NLP) with
                 their capability of feature learning and
                 representation. As one of the more challenging NLP
                 tasks, neural machine translation (NMT) becomes a new
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ameur:2020:RAT,
  author =       "Mohamed Seghir Hadj Ameur and Riadh Belkebir and Ahmed
                 Guessoum",
  title =        "Robust {Arabic} Text Categorization by Combining
                 Convolutional and Recurrent Neural Networks",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "66:1--66:16",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3390092",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3390092",
  abstract =     "Text Categorization is an important task in the area
                 of Natural Language Processing (NLP). Its goal is to
                 learn a model that can accurately classify any textual
                 document for a given language into one of a set of
                 predefined categories. In the context of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Das:2020:SMT,
  author =       "Ayan Das and Sudeshna Sarkar",
  title =        "A Survey of the Model Transfer Approaches to
                 Cross-Lingual Dependency Parsing",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "67:1--67:60",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3383772",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3383772",
  abstract =     "Cross-lingual dependency parsing approaches have been
                 employed to develop dependency parsers for the
                 languages for which little or no treebanks are
                 available using the treebanks of other languages. A
                 language for which the cross-lingual parser is
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Marie:2020:ITU,
  author =       "Benjamin Marie and Atsushi Fujita",
  title =        "Iterative Training of Unsupervised Neural and
                 Statistical Machine Translation Systems",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "68:1--68:21",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3389790",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3389790",
  abstract =     "Recent work achieved remarkable results in training
                 neural machine translation (NMT) systems in a fully
                 unsupervised way, with new and dedicated architectures
                 that only rely on monolingual corpora. However,
                 previous work also showed that unsupervised \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Chimalamarri:2020:MSI,
  author =       "Santwana Chimalamarri and Dinkar Sitaram and Ashritha
                 Jain",
  title =        "Morphological Segmentation to Improve Crosslingual
                 Word Embeddings for Low Resource Languages",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "69:1--69:15",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3390298",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3390298",
  abstract =     "Crosslingual word embeddings developed from multiple
                 parallel corpora help in understanding the
                 relationships between languages and improving the
                 prediction quality of machine translation. However, in
                 low resource languages with complex and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2020:PQA,
  author =       "Ying Li and Jizhou Huang and Miao Fan and Jinyi Lei
                 and Haifeng Wang and Enhong Chen",
  title =        "Personalized Query Auto-Completion for Large-Scale
                 {POI} Search at {Baidu} Maps",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "70:1--70:16",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3394137",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/spell.bib;
                 https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3394137",
  abstract =     "Query auto-completion (QAC) is a featured function
                 that has been widely adopted by many sub-domains of
                 search. It can dramatically reduce the number of typed
                 characters and avoid spelling mistakes. These merits of
                 QAC are highlighted to improve user satisfaction,
                 especially when users intend to type in a query on
                 mobile devices. In this article, we will present our
                 industrial solution to the personalized QAC for the
                 point of interest (POI) search at Baidu Maps, a
                 well-known Web mapping service on mobiles in China. The
                 industrial solution makes a good tradeoff between the
                 offline effectiveness of a novel neural learning model
                 that we devised for feature generation and the online
                 efficiency of an off-the-shelf learning to rank (LTR)
                 approach for the real-time suggestion. Besides some
                 practical lessons from how a real-world QAC system is
                 built and deployed in Baidu Maps to facilitate a large
                 number of users in searching tens of millions of POIs,
                 we mainly explore two specific features for the
                 personalized QAC function of the POI search engine: the
                 spatial-temporal characteristics of POIs and the
                 historically queried POIs of individual users.\par

                 We leverage the large-volume POI search logs in Baidu
                 Maps to conduct offline evaluations of our personalized
                 QAC model measured by multiple metrics, including Mean
                 Reciprocal Rank (MRR), Success Rate (SR), and
                 normalized Discounted Cumulative Gain (nDCG). Extensive
                 experimental results demonstrate that the personalized
                 model enhanced by the proposed features can achieve
                 substantial improvements (i.e., +3.29\% MRR, +3.78\%
                 SR@1, +5.17\% SR@3, +1.96\% SR@5, and +3.62\% nDCG@5).
                 After deploying this upgraded model into the POI search
                 engine at Baidu Maps for A/B testing online, we observe
                 that some other critical indicators, such as the
                 average number of keystrokes and the average typing
                 speed at keystrokes in a QAC session, which are also
                 related to user satisfaction, decrease as well by
                 1.37\% and 1.69\%, respectively. So the conclusion is
                 that the two kinds of features contributed by us are
                 quite helpful in personalized mapping services for
                 industrial practice.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Alkhatib:2020:DLA,
  author =       "Manar Alkhatib and Azza Abdel Monem and Khaled
                 Shaalan",
  title =        "Deep Learning for {Arabic} Error Detection and
                 Correction",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "71:1--71:13",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3373266",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/spell.bib;
                 https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3373266",
  abstract =     "Research on tools for automating the proofreading of
                 Arabic text has received much attention in recent
                 years. There is an increasing demand for applications
                 that can detect and correct Arabic spelling and
                 grammatical errors to improve the quality of Arabic
                 text content and application input. Our review of
                 previous studies indicates that few Arabic
                 spell-checking research efforts appropriately address
                 the detection and correction of ill-formed words that
                 do not conform to the Arabic morphology system. Even
                 fewer systems address the detection and correction of
                 erroneous well-formed Arabic words that are either
                 contextually or semantically inconsistent within the
                 text. We introduce an approach that investigates
                 employing deep neural network technology for error
                 detection in Arabic text. We have developed a
                 systematic framework for spelling and grammar error
                 detection, as well as correction at the word level,
                 based on a bidirectional long short-term memory
                 mechanism and word embedding, in which a polynomial
                 network classifier is at the top of the system. To get
                 conclusive results, we have developed the most
                 significant gold standard annotated corpus to date,
                 containing 15 million fully inflected Arabic words. The
                 data were collected from diverse text sources and
                 genres, in which every erroneous and ill-formed word
                 has been annotated, validated, and manually revised by
                 Arabic specialists. This valuable asset is available
                 for the Arabic natural language processing research
                 community. The experimental results confirm that our
                 proposed system significantly outperforms the
                 performance of Microsoft Word 2013 and Open Office
                 Ayaspell 3.4, which have been used in the literature
                 for evaluating similar research.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Orhan:2020:LWV,
  author =       "Umut Orhan and Enis Arslan",
  title =        "Learning Word-vector Quantization: a Case Study in
                 Morphological Disambiguation",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "72:1--72:18",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3397967",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3397967",
  abstract =     "We introduced a new classifier named Learning
                 Word-vector Quantization (LWQ) to solve morphological
                 ambiguities in Turkish, which is an agglutinative
                 language. First, a new and morphologically annotated
                 corpus, and then its datasets are prepared with a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Dhar:2020:CSC,
  author =       "Ankita Dhar and Himadri Mukherjee and Niladri Sekhar
                 Dash and Kaushik Roy",
  title =        "{CESS} --- a System to Categorize {Bangla} {Web} Text
                 Documents",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "73:1--73:18",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3398070",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3398070",
  abstract =     "Technology has evolved remarkably, which has led to an
                 exponential increase in the availability of digital
                 text documents of disparate domains over the Internet.
                 This makes the retrieval of the information a very much
                 time- and resource-consuming task. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bai:2020:NCT,
  author =       "Ruirui Bai and Zhongqing Wang and Fang Kong and
                 Shoushan Li and Guodong Zhou",
  title =        "Neural Co-training for Sentiment Classification with
                 Product Attributes",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "5",
  pages =        "74:1--74:17",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3394113",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Aug 28 11:52:49 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3394113",
  abstract =     "Sentiment classification aims to detect polarity from
                 a piece of text. The polarity is usually positive or
                 negative, and the text genre is usually product review.
                 The challenges of sentiment classification are that it
                 is hard to capture semantic of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Schmidt:2020:GTC,
  author =       "Dirk Schmidt",
  title =        "Grading {Tibetan} Children's Literature: a Test Case
                 Using the {NLP} Readability Tool {``Dakje''}",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "75:1--75:19",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3392046",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3392046",
  abstract =     "Worldwide, literacy is on the rise. This historically
                 unprecedented surge-especially over the past 200
                 years-has changed nearly everything about the ancient
                 technology of reading. Who reads is changing: Literacy
                 is no longer just for elite, professional \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Habiba:2020:TCN,
  author =       "Rabia Habiba and Dr. Muhammad Awais and Dr. Muhammad
                 Shoaib",
  title =        "A Technique to Calculate National Happiness Index by
                 Analyzing {Roman Urdu} Messages Posted on Social
                 Media",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "76:1--76:16",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3400712",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3400712",
  abstract =     "National Happiness Index (NHI) is a national indicator
                 of development that estimates the economic and social
                 well-being of the nation's individuals. With the
                 proliferation of the internet, people share a
                 significant amount of data on social media \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2020:EFL,
  author =       "Hao Wang and Qiongxing Tao and Siyuan Du and Xiangfeng
                 Luo",
  title =        "An Extensible Framework of Leveraging Syntactic
                 Skeleton for Semantic Relation Classification",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "77:1--77:21",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3402885",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3402885",
  abstract =     "Relation classification is one of the most fundamental
                 upstream tasks in natural language processing and
                 information extraction. State-of-the-art approaches
                 make use of various deep neural networks (DNNs) to
                 extract higher-level features directly. They \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Malhas:2020:ABR,
  author =       "Rana Malhas and Tamer Elsayed",
  title =        "{AyaTEC}: Building a Reusable Verse-Based Test
                 Collection for {Arabic} Question Answering on the {Holy
                 Qur'an}",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "78:1--78:21",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3400396",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3400396",
  abstract =     "The absence of publicly available reusable test
                 collections for Arabic question answering on the Holy
                 Qur'an has impeded the possibility of fairly comparing
                 the performance of systems in that domain. In this
                 article, we introduce AyaTEC, a reusable test
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ruan:2020:CTV,
  author =       "Yu-Ping Ruan and Zhen-Hua Ling and Xiaodan Zhu",
  title =        "Condition-Transforming Variational Autoencoder for
                 Generating Diverse Short Text Conversations",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "79:1--79:13",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3402884",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3402884",
  abstract =     "In this article, conditional-transforming variational
                 autoencoders (CTVAEs) are proposed for generating
                 diverse short text conversations. In conditional
                 variational autoencoders (CVAEs), the prior
                 distribution of latent variable z follows a
                 multivariate \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Badaro:2020:LPA,
  author =       "Gilbert Badaro and Hazem Hajj and Nizar Habash",
  title =        "A Link Prediction Approach for Accurately Mapping a
                 Large-scale {Arabic} Lexical Resource to {English}
                 {WordNet}",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "80:1--80:38",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3404854",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3404854",
  abstract =     "Success of Natural Language Processing (NLP) models,
                 just like all advanced machine learning models, rely
                 heavily on large -scale lexical resources. For English,
                 English WordNet (EWN) is a leading example of a
                 large-scale resource that has enabled \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ma:2020:IWS,
  author =       "Tinghuai Ma and Raeed Al-Sabri and Lejun Zhang and
                 Bockarie Marah and Najla Al-Nabhan",
  title =        "The Impact of Weighting Schemes and Stemming Process
                 on Topic Modeling of {Arabic} Long and Short Texts",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "81:1--81:23",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3405843",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3405843",
  abstract =     "In this article, first a comprehensive study of the
                 impact of term weighting schemes on the topic modeling
                 performance (i.e., LDA and DMM) on Arabic long and
                 short texts is presented. We investigate six term
                 weighting methods including Word count method
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{M:2020:CAH,
  author =       "Poornima Devi. M. and M. Sornam",
  title =        "Classification of Ancient Handwritten {Tamil}
                 Characters on Palm Leaf Inscription Using Modified
                 Adaptive Backpropagation Neural Network with {GLCM}
                 Features",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "82:1--82:24",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3406209",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3406209",
  abstract =     "The core aspiration of this proposed work is to
                 classify Tamil characters inscribed in the palm leaf
                 manuscript using an Artificial Neural Network. Tamil
                 palm leaf manuscript characters in the form of images
                 were processed and segmented using contour-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yang:2020:AMU,
  author =       "Qimeng Yang and Long Yu and Shengwei Tian and Jinmiao
                 Song",
  title =        "Attention Mechanism for {Uyghur} Personal Pronouns
                 Resolution",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "83:1--83:13",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3412323",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3412323",
  abstract =     "Deep neural network models for Uyghur personal pronoun
                 resolution learn semantic information for personal
                 pronoun and antecedents, but tend to be
                 short-sighted-they ignore the importance of each
                 feature. In this article, we propose a Uyghur personal
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Xi:2020:GEL,
  author =       "Xuefeng Xi and Zhou Pi and Guodong Zhou",
  title =        "Global Encoding for Long {Chinese} Text
                 Summarization",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "84:1--84:17",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3407911",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3407911",
  abstract =     "Text summarization is one of the significant tasks of
                 natural language processing, which automatically
                 converts text into a summary. Some summarization
                 systems, for short/long English, and short Chinese
                 text, benefit from advances in the neural encoder-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Tanwar:2020:TMR,
  author =       "Ashwani Tanwar and Prasenjit Majumder",
  title =        "Translating Morphologically Rich {Indian} Languages
                 under Zero-Resource Conditions",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "85:1--85:15",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3407912",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3407912",
  abstract =     "This work presents an in-depth analysis of machine
                 translations of morphologically-rich Indo-Aryan and
                 Dravidian languages under zero-resource conditions. It
                 focuses on Zero-Shot Systems for these languages and
                 leverages transfer-learning by exploiting \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Laatar:2020:DAW,
  author =       "Rim Laatar and Chafik Aloulou and Lamia Hadrich
                 Belguith",
  title =        "Disambiguating {Arabic} Words According to Their
                 Historical Appearance in the Document Based on
                 Recurrent Neural Networks",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "86:1--86:16",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3410569",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3410569",
  abstract =     "How can we determine the semantic meaning of a word in
                 relation to its context of appearance? We eventually
                 have to grabble with this difficult question, as one of
                 the paramount problems of Natural Language Processing
                 (NLP). In other words, this issue \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Chou:2020:CWN,
  author =       "Chien-Lung Chou and Chia-Hui Chang and Yuan-Hao Lin
                 and Kuo-Chun Chien",
  title =        "On the Construction of {Web} {NER} Model Training Tool
                 based on Distant Supervision",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "87:1--87:28",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3422817",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3422817",
  abstract =     "Named entity recognition (NER) is an important task in
                 natural language understanding, as it extracts the key
                 entities (person, organization, location, date, number,
                 etc.) and objects (product, song, movie, activity name,
                 etc.) mentioned in texts. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "87",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wu:2020:DEW,
  author =       "Chuhan Wu and Fangzhao Wu and Tao Qi and Junxin Liu
                 and Yongfeng Huang and Xing Xie",
  title =        "Detecting Entities of Works for {Chinese} Chatbot",
  journal =      j-TALLIP,
  volume =       "19",
  number =       "6",
  pages =        "88:1--88:13",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3414901",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sun Mar 28 08:15:55 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3414901",
  abstract =     "Chatbots such as Xiaoice have gained huge popularity
                 in recent years. Users frequently mention their
                 favorite works such as songs and movies in
                 conversations with chatbots. Detecting these entities
                 can help design better chat strategies and improve user
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "88",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Manogaran:2021:SID,
  author =       "Gunasekaran Manogaran and Hassan Qudrat-Ullah and Qin
                 Xin",
  title =        "Special Issue on Deep Structured Learning for Natural
                 Language Processing",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "1:1--1:2",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3436206",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3436206",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2021:CDB,
  author =       "Kun Wang and Yanpeng Cui and Jianwei Hu and Yu Zhang
                 and Wei Zhao and Luming Feng",
  title =        "Cyberbullying Detection, Based on the {FastText} and
                 Word Similarity Schemes",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "2:1--2:15",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3398191",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3398191",
  abstract =     "With recent developments in online social networks
                 (OSNs), these services are widely applied in daily
                 lives. On the other hand, cyberbullying, which is a
                 relatively new type of harassment through the
                 internet-based electronic devices, is rising in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sun:2021:DIM,
  author =       "Chengai Sun and Liangyu Lv and Gang Tian and Tailu
                 Liu",
  title =        "Deep Interactive Memory Network for Aspect-Level
                 Sentiment Analysis",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "3:1--3:12",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3402886",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3402886",
  abstract =     "The goal of aspect-level sentiment analysis is to
                 identify the sentiment polarity of a specific opinion
                 target expressed; it is a fine-grained sentiment
                 analysis task. Most of the existing works study how to
                 better use the target information to model \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2021:VTM,
  author =       "Wei Wang and Zhiguo Gong and Jing Ren and Feng Xia and
                 Zhihan Lv and Wei Wei",
  title =        "Venue Topic Model-enhanced Joint Graph Modelling for
                 Citation Recommendation in Scholarly Big Data",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "4:1--4:15",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3404995",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3404995",
  abstract =     "Natural language processing technologies, such as
                 topic models, have been proven to be effective for
                 scholarly recommendation tasks with the ability to deal
                 with content information. Recently, venue
                 recommendation is becoming an increasingly important
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Peng:2021:THS,
  author =       "Lingxi Peng and Haohuai Liu and Yangang Nie and Ying
                 Xie and Xuan Tang and Ping Luo",
  title =        "The Transnational Happiness Study with Big Data
                 Technology",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "5:1--5:12",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3412497",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3412497",
  abstract =     "Happiness is a hot topic in academic circles. The
                 study of happiness involves many disciplines, such as
                 philosophy, psychology, sociology, and economics.
                 However, there are few studies on the quantitative
                 analysis of the factors affecting happiness. In
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Guangce:2021:KDN,
  author =       "Ruan Guangce and Xia Lei",
  title =        "Knowledge Discovery of News Text Based on Artificial
                 Intelligence",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "6:1--6:18",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3418062",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3418062",
  abstract =     "The explosion of news text and the development of
                 artificial intelligence provide a new opportunity and
                 challenge to provide high-quality media monitoring
                 service. In this article, we propose a semantic
                 analysis approach based on the Latent Dirichlet
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Meelen:2021:OLA,
  author =       "Marieke Meelen and {\'E}lie Roux and Nathan Hill",
  title =        "Optimisation of the Largest Annotated {Tibetan} Corpus
                 Combining Rule-based, Memory-based, and Deep-learning
                 Methods",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "7:1--7:11",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3409488",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3409488",
  abstract =     "This article presents a pipeline that converts
                 collections of Tibetan documents in plain text or XML
                 into a fully segmented and POS-tagged corpus. We apply
                 the pipeline to the large extent collection of the
                 Buddhist Digital Resource Center. The semisupervised
                 methods presented here not only result in a new and
                 improved version of the largest annotated Tibetan
                 corpus to date, the integration of rule-based,
                 memory-based, and neural-network methods also serves as
                 a good example of how to overcome challenges of
                 under-researched languages. The end-to-end accuracy of
                 our entire automatic pipeline of 91.99\% is high enough
                 to make the resulting corpus a useful resource for both
                 linguists and scholars of Tibetan studies.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kumar:2021:MNF,
  author =       "S. Rakesh Kumar and S. Muthuramalingam and Fadi
                 Al-Turjman",
  title =        "Multimodal News Feed Evaluation System with Deep
                 Reinforcement Learning Approaches",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "8:1--8:12",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3414523",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3414523",
  abstract =     "Multilingual and multimodal data analysis is the
                 emerging news feed evaluation system. News feed
                 analysis and evaluations are interrelated processes,
                 which are useful in understanding the news factors. The
                 news feed evaluation system can be implemented
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Khan:2021:HSD,
  author =       "Muhammad Moin Khan and Khurram Shahzad and Muhammad
                 Kamran Malik",
  title =        "Hate Speech Detection in {Roman Urdu}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "9:1--9:19",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3414524",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3414524",
  abstract =     "Hate speech is a specific type of controversial
                 content that is widely legislated as a crime that must
                 be identified and blocked. However, due to the sheer
                 volume and velocity of the Twitter data stream, hate
                 speech detection cannot be performed manually. To
                 address this issue, several studies have been conducted
                 for hate speech detection in European languages,
                 whereas little attention has been paid to low-resource
                 South Asian languages, making the social media
                 vulnerable for millions of users. In particular, to the
                 best of our knowledge, no study has been conducted for
                 hate speech detection in Roman Urdu text, which is
                 widely used in the sub-continent. In this study, we
                 have scrapped more than 90,000 tweets and manually
                 parsed them to identify 5,000 Roman Urdu tweets.
                 Subsequently, we have employed an iterative approach to
                 develop guidelines and used them for generating the
                 Hate Speech Roman Urdu 2020 corpus. The tweets in the
                 this corpus are classified at three levels:
                 Neutral--Hostile, Simple--Complex, and Offensive--Hate
                 speech. As another contribution, we have used five
                 supervised learning techniques, including a deep
                 learning technique, to evaluate and compare their
                 effectiveness for hate speech detection. The results
                 show that Logistic Regression outperformed all other
                 techniques, including deep learning techniques for the
                 two levels of classification, by achieved an F1 score
                 of 0.906 for distinguishing between Neutral--Hostile
                 tweets, and 0.756 for distinguishing between
                 Offensive--Hate speech tweets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sun:2021:UNM,
  author =       "Haipeng Sun and Rui Wang and Masao Utiyama and
                 Benjamin Marie and Kehai Chen and Eiichiro Sumita and
                 Tiejun Zhao",
  title =        "Unsupervised Neural Machine Translation for Similar
                 and Distant Language Pairs: an Empirical Study",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "10:1--10:17",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3418059",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3418059",
  abstract =     "Unsupervised neural machine translation (UNMT) has
                 achieved remarkable results for several language pairs,
                 such as French-English and German-English. Most
                 previous studies have focused on modeling UNMT systems;
                 few studies have investigated the effect \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhang:2021:HBN,
  author =       "Peiying Zhang and Xingzhe Huang and Maozhen Li and Yu
                 Xue",
  title =        "Hybridization between Neural Computing and
                 Nature-Inspired Algorithms for a Sentence Similarity
                 Model Based on the Attention Mechanism",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "11:1--11:21",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447756",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447756",
  abstract =     "Sentence similarity analysis has been applied in many
                 fields, such as machine translation, the question
                 answering system, and voice customer service. As a
                 basic task of natural language processing, sentence
                 similarity analysis plays an important role in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Husain:2021:SOL,
  author =       "Fatemah Husain and Ozlem Uzuner",
  title =        "A Survey of Offensive Language Detection for the
                 {Arabic} Language",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "12:1--12:44",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3421504",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3421504",
  abstract =     "The use of offensive language in user-generated
                 content is a serious problem that needs to be addressed
                 with the latest technology. The field of Natural
                 Language Processing (NLP) can support the automatic
                 detection of offensive language. In this survey,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Alzubaidi:2021:RTA,
  author =       "Mohammad A. Alzubaidi and Mwaffaq Otoom and Nouran S.
                 Ahmad",
  title =        "Real-time Assistive Reader Pen for {Arabic} Language",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "13:1--13:30",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3423133",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3423133",
  abstract =     "Disability is an impairment affecting an individual's
                 livelihood and independence. Assistive technology
                 enables the disabled cohort of the community to break
                 the barriers to learning, access information,
                 contribute to the community, and live \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sidig:2021:KAS,
  author =       "Ala Addin I. Sidig and Hamzah Luqman and Sabri Mahmoud
                 and Mohamed Mohandes",
  title =        "{KArSL}: {Arabic} Sign Language Database",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "14:1--14:19",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3423420",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3423420",
  abstract =     "Sign language is the major means of communication for
                 the deaf community. It uses body language and gestures
                 such as hand shapes, lib patterns, and facial
                 expressions to convey a message. Sign language is
                 geography-specific, as it differs from one \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wijayanti:2021:AIS,
  author =       "Rini Wijayanti and Andria Arisal",
  title =        "Automatic {Indonesian} Sentiment Lexicon Curation with
                 Sentiment Valence Tuning for Social Media Sentiment
                 Analysis",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "15:1--15:16",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3425632",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3425632",
  abstract =     "A novel Indonesian sentiment lexicon (SentIL ---
                 Sentiment Indonesian Lexicon) is created with an
                 automatic pipeline; from creating sentiment seed words,
                 adding new words with slang words, emoticons, and from
                 the given dictionary and sentiment corpus, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2021:TTS,
  author =       "Zhongyang Li and Xiao Ding and Ting Liu",
  title =        "{TransBERT}: a Three-Stage Pre-training Technology for
                 Story-Ending Prediction",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "16:1--16:20",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3427669",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3427669",
  abstract =     "Recent advances, such as GPT, BERT, and RoBERTa, have
                 shown success in incorporating a pre-trained
                 transformer language model and fine-tuning operations
                 to improve downstream NLP systems. However, this
                 framework still has some fundamental problems in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bolucu:2021:CUM,
  author =       "Necva B{\"o}l{\"u}c{\"u} and Burcu Can",
  title =        "A Cascaded Unsupervised Model for {PoS} Tagging",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "17:1--17:23",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447759",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447759",
  abstract =     "Part of speech (PoS) tagging is one of the fundamental
                 syntactic tasks in Natural Language Processing, as it
                 assigns a syntactic category to each word within a
                 given sentence or context (such as noun, verb,
                 adjective, etc.). Those syntactic categories \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Chauhan:2021:ISC,
  author =       "Uttam Chauhan and Apurva Shah",
  title =        "Improving Semantic Coherence of {Gujarati} Text Topic
                 Model Using Inflectional Forms Reduction and
                 Single-letter Words Removal",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "1",
  pages =        "18:1--18:18",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447760",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu Apr 15 14:24:01 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447760",
  abstract =     "A topic model is one of the best stochastic models for
                 summarizing an extensive collection of text. It has
                 accomplished an inordinate achievement in text analysis
                 as well as text summarization. It can be employed to
                 the set of documents that are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Congjun:2021:RDT,
  author =       "Long Congjun and Nathan W. Hill",
  title =        "Recent Developments in {Tibetan NLP}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "19:1--19:3",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3453692",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3453692",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{List:2021:TSH,
  author =       "Johann-Mattis List and Nathaniel A. Sims and Robert
                 Forkel",
  title =        "Toward a Sustainable Handling of Interlinear-Glossed
                 Text in Language Documentation",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "20:1--20:15",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3389010",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3389010",
  abstract =     "While the amount of digitally available data on the
                 worlds' languages is steadily increasing, with more and
                 more languages being documented, only a small
                 proportion of the language resources produced are
                 sustainable. Data reuse is often difficult due to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Krishna:2021:ATA,
  author =       "Ravi Krishna and Norman Mu and Kurt Keutzer",
  title =        "Applying Text Analytics to the Mind-section Literature
                 of the {Tibetan} Tradition of the {Great Perfection}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "21:1--21:32",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3392047",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3392047",
  abstract =     "Over the past decade, through a mixture of optical
                 character recognition and manual input, there is now a
                 growing corpus of Tibetan literature available as
                 e-texts in Unicode format. With the creation of such a
                 corpus, the techniques of text analytics \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Trinley:2021:TWE,
  author =       "Ngawang Trinley and Tenzin and Dirk Schmidt and Helios
                 Hildt and Tenzin Kaldan",
  title =        "Taming the Wild Etext: Managing, Annotating, and
                 Sharing {Tibetan} Corpora in Open Spaces",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "22:1--22:23",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3418060",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3418060",
  abstract =     "Digital text is quickly becoming essential to modern
                 daily life. The article you are reading right now is
                 born digital; unlike texts of the not-so-distant past,
                 it may never be printed at all. Worldwide, the trend is
                 clear: Digital text is on the way in, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kulkarni:2021:SPF,
  author =       "Amba Kulkarni",
  title =        "{Sanskrit} Parsing Following {Indian} Theories of
                 Verbal Cognition",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "23:1--23:38",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3418061",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3418061",
  abstract =     "P{\=a}{\d{n}}ini's grammar is an important milestone
                 in the Indian grammatical tradition. Unlike grammars of
                 other languages, it is almost exhaustive and together
                 with the theories of 'sabdabodha (verbal cognition),
                 this grammar provides a system for language \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2021:FBS,
  author =       "Yachao Li and Jing Jiang and Jia Yangji and Ning Ma",
  title =        "Finding Better Subwords for {Tibetan} Neural Machine
                 Translation",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "24:1--24:11",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3448216",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3448216",
  abstract =     "Subword segmentation plays an important role in
                 Tibetan neural machine translation (NMT). The structure
                 of Tibetan words consists of two levels. First, words
                 consist of a sequence of syllables, and then a syllable
                 consists of a sequence of characters. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Long:2021:RTM,
  author =       "Congjun Long and Xuewen Zhou and Maoke Zhou",
  title =        "Recognition of {Tibetan} Maximal-length Noun Phrases
                 Based on Syntax Tree",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "25:1--25:13",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3423324",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3423324",
  abstract =     "Frequently corresponding to syntactic components, the
                 Maximal-length Noun Phrase (MNP) possesses abundant
                 syntactic and semantic information and acts a certain
                 semantic role in sentences. Recognition of MNP plays an
                 important role in Natural Language \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Shi:2021:MLC,
  author =       "Shumin Shi and Dan Luo and Xing Wu and Congjun Long
                 and Heyan Huang",
  title =        "Multi-level Chunk-based Constituent-to-Dependency
                 {Treebank} Transformation for {Tibetan} Dependency
                 Parsing",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "26:1--26:12",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3424247",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3424247",
  abstract =     "Dependency parsing is an important task for Natural
                 Language Processing (NLP). However, a mature parser
                 requires a large treebank for training, which is still
                 extremely costly to create. Tibetan is a kind of
                 extremely low-resource language for NLP, there
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sun:2021:JMR,
  author =       "Yuan Sun and Andong Chen and Chaofan Chen and Tianci
                 Xia and Xiaobing Zhao",
  title =        "A Joint Model for Representation Learning of {Tibetan}
                 Knowledge Graph Based on Encyclopedia",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "27:1--27:17",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447248",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447248",
  abstract =     "Learning the representation of a knowledge graph is
                 critical to the field of natural language processing.
                 There is a lot of research for English knowledge graph
                 representation. However, for the low-resource
                 languages, such as Tibetan, how to represent \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2021:CSE,
  author =       "Hao Wang and Bin Wang and Jianyong Duan and Jiajun
                 Zhang",
  title =        "{Chinese} Spelling Error Detection Using a Fusion
                 Lattice {LSTM}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "28:1--28:11",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3426882",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3426882",
  abstract =     "Spelling error detection serves as a crucial
                 preprocessing in many natural language processing
                 applications. Unlike English, where every single word
                 is directly typed by keyboard, we have to use an input
                 method to input Chinese characters. The pinyin
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Nasution:2021:POB,
  author =       "Arbi Haza Nasution and Yohei Murakami and Toru
                 Ishida",
  title =        "Plan Optimization to Bilingual Dictionary Induction
                 for Low-resource Language Families",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "29:1--29:28",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3448215",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3448215",
  abstract =     "Creating bilingual dictionary is the first crucial
                 step in enriching low-resource languages. Especially
                 for the closely related ones, it has been shown that
                 the constraint-based approach is useful for inducing
                 bilingual lexicons from two bilingual \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{An:2021:NDP,
  author =       "Bo An and Congjun Long",
  title =        "Neural Dependency Parser for {Tibetan} Sentences",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "30:1--30:16",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3429456",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3429456",
  abstract =     "The research of Tibetan dependency analysis is mainly
                 limited to two challenges: lack of a dataset and
                 reliance on expert knowledge. To resolve the preceding
                 challenges, we first introduce a new Tibetan dependency
                 analysis dataset, and then propose a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhang:2021:USC,
  author =       "Longtu Zhang and Mamoru Komachi",
  title =        "Using Sub-character Level Information for Neural
                 Machine Translation of Logographic Languages",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "31:1--31:15",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3431727",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3431727",
  abstract =     "Logographic and alphabetic languages (e.g., Chinese
                 vs. English) have different writing systems
                 linguistically. Languages belonging to the same writing
                 system usually exhibit more sharing information, which
                 can be used to facilitate natural language \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mishra:2021:HIC,
  author =       "Santosh Kumar Mishra and Rijul Dhir and Sriparna Saha
                 and Pushpak Bhattacharyya",
  title =        "A {Hindi} Image Caption Generation Framework Using
                 Deep Learning",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "32:1--32:19",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3432246",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3432246",
  abstract =     "Image captioning is the process of generating a
                 textual description of an image that aims to describe
                 the salient parts of the given image. It is an
                 important problem, as it involves computer vision and
                 natural language processing, where computer vision
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Darwish:2021:ADR,
  author =       "Kareem Darwish and Ahmed Abdelali and Hamdy Mubarak
                 and Mohamed Eldesouki",
  title =        "{Arabic} Diacritic Recovery Using a Feature-rich
                 {biLSTM} Model",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "33:1--33:18",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434235",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434235",
  abstract =     "Diacritics (short vowels) are typically omitted when
                 writing Arabic text, and readers have to reintroduce
                 them to correctly pronounce words. There are two types
                 of Arabic diacritics: The first are core-word
                 diacritics (CW), which specify the lexical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Binbeshr:2021:SRH,
  author =       "Farid Binbeshr and Amirrudin Kamsin and Manal
                 Mohammed",
  title =        "A Systematic Review on Hadith Authentication and
                 Classification Methods",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "34:1--34:17",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434236",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434236",
  abstract =     "Background: A hadith refers to sayings, actions, and
                 characteristics of the Prophet Muhammad peace be upon
                 him. The authenticity of hadiths is crucial, because
                 they constitute the source of legislation for Muslims
                 with the Holy Quran. Classifying hadiths \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2021:HMN,
  author =       "Yu Wang and Yining Sun and Zuchang Ma and Lisheng Gao
                 and Yang Xu",
  title =        "A Hybrid Model for Named Entity Recognition on
                 {Chinese} Electronic Medical Records",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "35:1--35:12",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3436819",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3436819",
  abstract =     "Electronic medical records (EMRs) contain valuable
                 information about the patients, such as clinical
                 symptoms, diagnostic results, and medications. Named
                 entity recognition (NER) aims to recognize entities
                 from unstructured text, which is the initial step
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jin:2021:HSS,
  author =       "Guozhe Jin and Zhezhou Yu",
  title =        "A Hierarchical Sequence-to-Sequence Model for {Korean}
                 {POS} Tagging",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "2",
  pages =        "36:1--36:13",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3421762",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Thu May 6 07:32:43 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3421762",
  abstract =     "Part-of-speech (POS) tagging is a fundamental task in
                 natural language processing. Korean POS tagging
                 consists of two subtasks: morphological analysis and
                 POS tagging. In recent years, scholars have tended to
                 use the seq2seq model to solve this problem. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{S:2021:SAA,
  author =       "Dhivya S. and Usha Devi G.",
  title =        "Study on Automated Approach to Recognize Characters
                 for Handwritten and Historical Document",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "37:1--37:24",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3396167",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3396167",
  abstract =     "Script recognition is the mechanism of automatic
                 script analysis and recognition whereby intensive study
                 has been carried out and a significant amount of papers
                 on this problem have been released over the past. But
                 there are still a few issues to be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yang:2021:DDH,
  author =       "Xiaodong Yang and Xiaoxia Lin",
  title =        "Design and Development of Heuristic Utility Management
                 Algorithm for {Chinese} Library Management System",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "38:1--38:13",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3397968",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3397968",
  abstract =     "Utility Management in a library is the programmatic
                 tool with the synthetic mental program ability, along
                 with Artificial Intelligence capacities, headed to
                 manage a high volume of books, articles, and
                 assignments, which help to ease the manual \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{S:2021:THT,
  author =       "Dhivya S. and Usha Devi G.",
  title =        "{TAMIZHI}: Historical Tamil-Brahmi Script Recognition
                 Using {CNN} and {MobileNet}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "39:1--39:26",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3402891",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3402891",
  abstract =     "Computational epigraphy is the study of an ancient
                 script where the computer science and mathematical
                 model is relatively built for epigraphy. The
                 Tamil-Brahmi inscriptions are the most ancient of the
                 extant written of the Tamil. The inscriptions furnish
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jiang:2021:TLB,
  author =       "Peipei Jiang and Liailun Chen and Min-Feng Wang",
  title =        "Transfer Learning Based Recurrent Neural Network
                 Algorithm for Linguistic Analysis",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "40:1--40:16",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3406204",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3406204",
  abstract =     "Each language is a system of understanding and skills
                 that allows language users to interact, express
                 thoughts, hypotheses, feelings, wishes, and all that
                 needs to be expressed. Linguistics is the research of
                 these structures in all respects: the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Fang:2021:HBG,
  author =       "Hui Fang and Hongmei Shi and Jiuzhou Zhang",
  title =        "Heuristic Bilingual Graph Corpus Network to Improve
                 {English} Instruction Methodology Based on Statistical
                 Translation Approach",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "41:1--41:14",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3406205",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3406205",
  abstract =     "The number of sentence pairs in the bilingual corpus
                 is a key to translation accuracy in computational
                 machine translations. However, if the amount goes
                 beyond a certain degree, the increasing number of cases
                 has less impact on the translation while the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jamal:2021:DLB,
  author =       "Nasir Jamal and Chen Xianqiao and Fadi Al-Turjman and
                 Farhan Ullah",
  title =        "A Deep Learning-based Approach for Emotions
                 Classification in Big Corpus of Imbalanced Tweets",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "42:1--42:16",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3410570",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3410570",
  abstract =     "Emotions detection in natural languages is very
                 effective in analyzing the user's mood about a
                 concerned product, news, topic, and so on. However, it
                 is really a challenging task to extract important
                 features from a burst of raw social text, as emotions
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Muthu:2021:FET,
  author =       "Balaanand Muthu and Sivaparthipan Cb and Priyan
                 Malarvizhi Kumar and Seifedine Nimer Kadry and
                 Ching-Hsien Hsu and Oscar Sanjuan and Ruben Gonzalez
                 Crespo",
  title =        "A Framework for Extractive Text Summarization Based on
                 Deep Learning Modified Neural Network Classifier",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "45:1--45:20",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3392048",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3392048",
  abstract =     "There is an exponential growth of text data over the
                 internet, and it is expected to gain significant growth
                 and attention in the coming years. Extracting
                 meaningful insights from text data is crucially
                 important as it offers value-added solutions to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2021:AEA,
  author =       "Ailing Wang and Jie Sun and Leiming Li",
  title =        "An Analysis for Elements of Affecting the
                 Establishment and Promotion of Micro-business Trust in
                 {C2C} Model under {WeChat} Circumstance",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "46:1--46:11",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3398011",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3398011",
  abstract =     "The core of micro-business and consumer transactions
                 is trust. Based on the Theory of Reasoned Action and
                 Technology Acceptance Model, this article discusses the
                 factors of the establishment and promotion of
                 micro-business trust from the trust orientation
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2021:SGD,
  author =       "Erlu Wang and Priyan Malarvizhi Kumar and R. Dinesh
                 Jackson Samuel",
  title =        "Semantic Graphical Dependence Parsing Model in
                 Improving {English} Teaching Abilities",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "48:1--48:14",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3425633",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3425633",
  abstract =     "It is a very difficult problem to achieve high-order
                 functionality for graphical dependency parsing without
                 growing decoding difficulties. To solve this problem,
                 this article offers a way for Semantic Graphical
                 Dependence Parsing Model (SGDPM) with a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{P:2021:TST,
  author =       "Ashokkumar P. and Siva Shankar G. and Gautam
                 Srivastava and Praveen Kumar Reddy Maddikunta and
                 Thippa Reddy Gadekallu",
  title =        "A Two-stage Text Feature Selection Algorithm for
                 Improving Text Classification",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "49:1--49:19",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3425781",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3425781",
  abstract =     "As the number of digital text documents increases on a
                 daily basis, the classification of text is becoming a
                 challenging task. Each text document consists of a
                 large number of words (or features) that drive down the
                 efficiency of a classification \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Quamer:2021:SSA,
  author =       "Waris Quamer and Praphula Kumar Jain and Arpit Rai and
                 Vijayalakshmi Saravanan and Rajendra Pamula and
                 Chiranjeev Kumar",
  title =        "{SACNN}: Self-attentive Convolutional Neural Network
                 Model for Natural Language Inference",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "50:1--50:16",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3426884",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3426884",
  abstract =     "Inference has been central problem for understanding
                 and reasoning in artificial intelligence. Especially,
                 Natural Language Inference is an interesting problem
                 that has attracted the attention of many researchers.
                 Natural language inference intends to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liao:2021:ENO,
  author =       "Hsiu-Li Liao and Zhen-Yu Huang and Su-Houn Liu",
  title =        "The Effects of Negative Online Reviews on Consumer
                 Perception, Attitude and Purchase Intention:
                 Experimental Investigation of the Amount, Quality, and
                 Presentation Order of {eWOM}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "51:1--51:21",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3426883",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3426883",
  abstract =     "The quick growth and fast spread of electronic
                 word-of-mouth (eWOM) have created a new threat to
                 Internet merchants and marketers through paid online
                 reviewers flooding sites with product and service
                 reviews that could confuse and deter customers. This
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Seifollahi:2021:EBT,
  author =       "Sattar Seifollahi and Massimo Piccardi and Alireza
                 Jolfaei",
  title =        "An Embedding-Based Topic Model for Document
                 Classification",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "52:1--52:13",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3431728",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3431728",
  abstract =     "Topic modeling is an unsupervised learning task that
                 discovers the hidden topics in a collection of
                 documents. In turn, the discovered topics can be used
                 for summarizing, organizing, and understanding the
                 documents in the collection. Most of the existing
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2021:DSL,
  author =       "Yong Li and Xiaojun Yang and Min Zuo and Qingyu Jin
                 and Haisheng Li and Qian Cao",
  title =        "Deep Structured Learning for Natural Language
                 Processing",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "3",
  pages =        "53:1--53:14",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3433538",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:09 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3433538",
  abstract =     "The real-time and dissemination characteristics of
                 network information make net-mediated public opinion
                 become more and more important food safety early
                 warning resources, but the data of petabyte (PB) scale
                 growth also bring great difficulties to the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mao:2021:NJM,
  author =       "Cunli Mao and Zhibo Man and Zhengtao Yu and Shengxiang
                 Gao and Zhenhan Wang and Hongbin Wang",
  title =        "A Neural Joint Model with {BERT} for {Burmese}
                 Syllable Segmentation, Word Segmentation, and {POS}
                 Tagging",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "54:1--54:23",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3436818",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3436818",
  abstract =     "The smallest semantic unit of the Burmese language is
                 called the syllable. In the present study, it is
                 intended to propose the first neural joint learning
                 model for Burmese syllable segmentation, word
                 segmentation, and part-of-speech (POS) tagging with
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{E:2021:AMP,
  author =       "Manjunath K. E. and Srinivasa Raghavan K. M. and K.
                 Sreenivasa Rao and Dinesh Babu Jayagopi and V.
                 Ramasubramanian",
  title =        "Approaches for Multilingual Phone Recognition in
                 Code-switched and Non-code-switched Scenarios Using
                 {Indian} Languages",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "55:1--55:19",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3437256",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3437256",
  abstract =     "In this study, we evaluate and compare two different
                 approaches for multilingual phone recognition in
                 code-switched and non-code-switched scenarios. First
                 approach is a front-end Language Identification
                 (LID)-switched to a monolingual phone recognizer
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kumar:2021:NAM,
  author =       "Mohinder Kumar and Manish Kumar Jindal and Munish
                 Kumar",
  title =        "A Novel Attack on Monochrome and Greyscale
                 {Devanagari} {CAPTCHAs}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "56:1--56:30",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3439798",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3439798",
  abstract =     "The use of computer programs in breaching web site
                 security is common today. CAPTCHA (Completely Automated
                 Public Turing test to tell Computers and Humans Apart)
                 and human interaction proofs are the cost-effective
                 solution to these kinds of computer \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Lin:2021:FIG,
  author =       "Nankai Lin and Boyu Chen and Xiaotian Lin and Kanoksak
                 Wattanachote and Shengyi Jiang",
  title =        "A Framework for {Indonesian} Grammar Error
                 Correction",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "57:1--57:12",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3440993",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3440993",
  abstract =     "Grammatical Error Correction (GEC) is a challenge in
                 Natural Language Processing research. Although many
                 researchers have been focusing on GEC in universal
                 languages such as English or Chinese, few studies focus
                 on Indonesian, which is a low-resource \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Shivachi:2021:LSU,
  author =       "Casper Shikali Shivachi and Refuoe Mokhosi and Zhou
                 Shijie and Liu Qihe",
  title =        "Learning Syllables Using {Conv-LSTM} Model for
                 {Swahili} Word Representation and Part-of-speech
                 Tagging",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "58:1--58:25",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3445975",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3445975",
  abstract =     "The need to capture intra-word information in natural
                 language processing (NLP) tasks has inspired research
                 in learning various word representations at word,
                 character, or morpheme levels, but little attention has
                 been given to syllables from a syllabic \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ranathunga:2021:SAS,
  author =       "Surangika Ranathunga and Isuru Udara Liyanage",
  title =        "Sentiment Analysis of {Sinhala} News Comments",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "59:1--59:23",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3445035",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3445035",
  abstract =     "Sinhala is a low-resource language, for which basic
                 language and linguistic tools have not been properly
                 defined. This affects the development of NLP-based
                 end-user applications for Sinhala. Thus, when
                 implementing NLP tools such as sentiment analyzers,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhu:2021:GBM,
  author =       "Junnan Zhu and Lu Xiang and Yu Zhou and Jiajun Zhang
                 and Chengqing Zong",
  title =        "Graph-based Multimodal Ranking Models for Multimodal
                 Summarization",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "60:1--60:21",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3445794",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3445794",
  abstract =     "Multimodal summarization aims to extract the most
                 important information from the multimedia input. It is
                 becoming increasingly popular due to the rapid growth
                 of multimedia data in recent years. There are various
                 researches focusing on different \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Lalrempuii:2021:IEM,
  author =       "Candy Lalrempuii and Badal Soni and Partha Pakray",
  title =        "An Improved {English-to-Mizo} Neural Machine
                 Translation",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "61:1--61:21",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3445974",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3445974",
  abstract =     "Machine Translation is an effort to bridge language
                 barriers and misinterpretations, making communication
                 more convenient through the automatic translation of
                 languages. The quality of translations produced by
                 corpus-based approaches predominantly depends
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Thin:2021:TNL,
  author =       "Dang Van Thin and Ngan Luu-Thuy Nguyen and Tri Minh
                 Truong and Lac Si Le and Duy Tin Vo",
  title =        "Two New Large Corpora for {Vietnamese} Aspect-based
                 Sentiment Analysis at Sentence Level",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "62:1--62:22",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3446678",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446678",
  abstract =     "Aspect-based sentiment analysis has been studied in
                 both research and industrial communities over recent
                 years. For the low-resource languages, the standard
                 benchmark corpora play an important role in the
                 development of methods. In this article, we \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Alian:2021:BAP,
  author =       "Marwah Alian and Arafat Awajan and Ahmad Al-Hasan and
                 Raeda Akuzhia",
  title =        "Building {Arabic} Paraphrasing Benchmark based on
                 Transformation Rules",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "63:1--63:17",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3446770",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446770",
  abstract =     "Measuring semantic similarity between short texts is
                 an important task in many applications of natural
                 language processing, such as paraphrasing
                 identification. This process requires a benchmark of
                 sentence pairs that are labeled by Arab linguists and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Prabhakar:2021:QET,
  author =       "Dinesh Kumar Prabhakar and Sukomal Pal and Chiranjeev
                 Kumar",
  title =        "Query Expansion for Transliterated Text Retrieval",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "64:1--64:34",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447649",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447649",
  abstract =     "With Web 2.0, there has been exponential growth in the
                 number of Web users and the volume of Web content. Most
                 of these users are not only consumers of the
                 information but also generators of it. People express
                 themselves here in colloquial languages, but \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Taghizadeh:2021:CLA,
  author =       "Nasrin Taghizadeh and Heshaam Faili",
  title =        "Cross-lingual Adaptation Using Universal
                 Dependencies",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "65:1--65:23",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3448251",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3448251",
  abstract =     "We describe a cross-lingual adaptation method based on
                 syntactic parse trees obtained from the Universal
                 Dependencies (UD), which are consistent across
                 languages, to develop classifiers in low-resource
                 languages. The idea of UD parsing is to capture
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Coban:2021:FTM,
  author =       "{\"O}nder {\c{C}}oban and Ali Inan and Selma Ayse
                 {\"O}zel",
  title =        "{Facebook} Tells Me Your Gender: an Exploratory Study
                 of Gender Prediction for {Turkish} {Facebook} Users",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "66:1--66:38",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3448253",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3448253",
  abstract =     "Online Social Networks (OSNs) are very popular
                 platforms for social interaction. Data posted publicly
                 over OSNs pose various threats against the individual
                 privacy of OSN users. Adversaries can try to predict
                 private attribute values, such as gender, as \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Qi:2021:DPB,
  author =       "Shanshan Qi and Limin Zheng and Feiyu Shang",
  title =        "Dependency Parsing-based Entity Relation Extraction
                 over {Chinese} Complex Text",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "67:1--67:34",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450273",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450273",
  abstract =     "Open Relation Extraction (ORE) plays a significant
                 role in the field of Information Extraction. It breaks
                 the limitation that traditional relation extraction
                 must pre-define relational types in the annotated
                 corpus and specific domains restrictions, to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mousavi:2021:DPW,
  author =       "Zahra Mousavi and Heshaam Faili",
  title =        "Developing the {Persian} {Wordnet} of Verbs Using
                 Supervised Learning",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "68:1--68:18",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450969",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450969",
  abstract =     "Nowadays, wordnets are extensively used as a major
                 resource in natural language processing and information
                 retrieval tasks. Therefore, the accuracy of wordnets
                 has a direct influence on the performance of the
                 involved applications. This paper presents a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Arora:2021:SSR,
  author =       "Karunesh Kumar Arora and Shyam Sunder Agrawal",
  title =        "Source-side Reordering to Improve Machine Translation
                 between Languages with Distinct Word Orders",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "69:1--69:18",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3448252",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3448252",
  abstract =     "English and Hindi have significantly different word
                 orders. English follows the subject-verb-object (SVO)
                 order, while Hindi primarily follows the
                 subject-object-verb (SOV) order. This difference poses
                 challenges to modeling this pair of languages for
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kumari:2021:RNS,
  author =       "Divya Kumari and Asif Ekbal and Rejwanul Haque and
                 Pushpak Bhattacharyya and Andy Way",
  title =        "Reinforced {NMT} for Sentiment and Content
                 Preservation in Low-resource Scenario",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "70:1--70:27",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450970",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450970",
  abstract =     "The preservation of domain knowledge from source to
                 the target is crucial in any translation workflows.
                 Hence, translation service providers that use machine
                 translation (MT) in production could reasonably expect
                 that the translation process should \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sathish:2021:ISA,
  author =       "R. Sathish and P. Ezhumalai",
  title =        "Intermodal Sentiment Analysis for Images with Text
                 Captions Using the {VGGNET} Technique",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "4",
  pages =        "71:1--71:14",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450971",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Sep 14 07:03:10 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450971",
  abstract =     "More individuals actively express their opinions and
                 attitudes in social media through advanced improvements
                 such as visual content and text captions. Sentiment
                 analysis for visuals such as images, video, and GIFs
                 has become an emerging research trend in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Manogaran:2021:ISI,
  author =       "Gunasekaran Manogaran and Hassan Qudrat-Ullah and Qin
                 Xin",
  title =        "Introduction to the Special Issue on Deep Structured
                 Learning for Natural Language Processing, {Part 3}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "72e:1--72e:3",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3476464",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3476464",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "72e",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Tan:2021:RPT,
  author =       "Junyang Tan and Dan Xia and Shiyun Dong and Honghao
                 Zhu and Binshi Xu",
  title =        "Research On Pre-Training Method and Generalization
                 Ability of Big Data Recognition Model of the {Internet
                 of Things}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "72:1--72:15",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3433539",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3433539",
  abstract =     "The Internet of Things and big data are currently hot
                 concepts and research fields. The mining,
                 classification, and recognition of big data in the
                 Internet of Things system are the key links that are
                 widely of concern at present. The artificial neural
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2021:SAQ,
  author =       "Yarong Li",
  title =        "Sequence Alignment with {Q}-Learning Based on the
                 Actor--Critic Model",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "73:1--73:7",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3433540",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3433540",
  abstract =     "Multiple sequence alignment methods refer to a series
                 of algorithmic solutions for the alignment of
                 evolutionary-related sequences while taking into
                 account evolutionary events such as mutations,
                 insertions, deletions, and rearrangements under certain
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Naseem:2021:CSW,
  author =       "Usman Naseem and Imran Razzak and Shah Khalid Khan and
                 Mukesh Prasad",
  title =        "A Comprehensive Survey on Word Representation Models:
                 From Classical to State-of-the-Art Word Representation
                 Language Models",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "74:1--74:35",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434237",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434237",
  abstract =     "Word representation has always been an important
                 research area in the history of natural language
                 processing (NLP). Understanding such complex text data
                 is imperative, given that it is rich in information and
                 can be used widely across various \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ji:2021:OAP,
  author =       "Xiaowen Ji and Jincheng Ni",
  title =        "An {OT-ET} Analysis of {Polish} Singular--Plural
                 Pairs",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "75:1--75:12",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434238",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434238",
  abstract =     "Optimality Theory (OT) and Exemplar Theory (ET) are
                 two enchanting theories to many scholars, but each
                 still faces criticism and remaining persistent
                 problems. Application of both theories to areas in
                 linguistics where conflicts may arise has been
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jing:2021:GGM,
  author =       "Weipeng Jing and Xianyang Song and Donglin Di and
                 Houbing Song",
  title =        "{geoGAT}: Graph Model Based on Attention Mechanism for
                 Geographic Text Classification",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "76:1--76:18",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434239",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434239",
  abstract =     "In the area of geographic information processing,
                 there are few researches on geographic text
                 classification. However, the application of this task
                 in Chinese is relatively rare. In our work, we intend
                 to implement a method to extract text containing
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bi:2021:BDL,
  author =       "Mingwen Bi and Qingchuan Zhang and Min Zuo and Zelong
                 Xu and Qingyu Jin",
  title =        "Bi-directional Long Short-Term Memory Model with
                 Semantic Positional Attention for the Question
                 Answering System",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "77:1--77:13",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3439800",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3439800",
  abstract =     "The intelligent question answering system aims to
                 provide quick and concise feedback on the questions of
                 users. Although the performance of phrase-level and
                 numerous attention models have been improved, the
                 sentence components and position information are
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Fan:2021:DNN,
  author =       "Xiaoqian Fan and Bowen Yang and Wenzhi Chen and
                 Quanfang Fan",
  title =        "Deep Neural Network Based Noised {Asian} Speech
                 Enhancement and Its Implementation on a Hearing Aid
                 App",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "78:1--78:14",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3439797",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3439797",
  abstract =     "This article studies noised Asian speech enhancement
                 based on the deep neural network (DNN) and its
                 implementation on an app. We use the THCHS-30 speech
                 dataset and the common noise dataset in daily life as
                 training and testing data of the DNN. To stack
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhao:2021:MOH,
  author =       "Chunhe Zhao and Balaanand Muthu and P. Mohamed
                 Shakeel",
  title =        "Multi-Objective Heuristic Decision Making and
                 Benchmarking for Mobile Applications in {English}
                 Language Learning",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "79:1--79:16",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3439799",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3439799",
  abstract =     "This research proposes to evaluate and analyze the
                 decision matrix for learner's English mobile
                 applications (EMAs) based on multi-objective heuristic
                 decision making with a view to listening, speaking,
                 reading, and writing. Because of the number of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Gupta:2021:TIC,
  author =       "Vedika Gupta and Nikita Jain and Shubham Shubham and
                 Agam Madan and Ankit Chaudhary and Qin Xin",
  title =        "Toward Integrated {CNN}-based Sentiment Analysis of
                 Tweets for Scarce-resource Language-{Hindi}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "80:1--80:23",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450447",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450447",
  abstract =     "Linguistic resources for commonly used languages such
                 as English and Mandarin Chinese are available in
                 abundance, hence the existing research in these
                 languages. However, there are languages for which
                 linguistic resources are scarcely available. One of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Do:2021:DVT,
  author =       "Phuc Do and Truong H. V. Phan and Brij B. Gupta",
  title =        "Developing a {Vietnamese} Tourism Question Answering
                 System Using Knowledge Graph and Deep Learning",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "81:1--81:18",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3453651",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3453651",
  abstract =     "In recent years, Question Answering (QA) systems have
                 increasingly become very popular in many sectors. This
                 study aims to use a knowledge graph and deep learning
                 to develop a QA system for tourism in Vietnam. First,
                 the QA system replies to a user's \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2021:REU,
  author =       "Meng Li",
  title =        "Research on Extraction of Useful Tourism Online
                 Reviews Based on Multimodal Feature Fusion",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "82:1--82:16",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3453694",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3453694",
  abstract =     "To effectively identify the influencing factors of the
                 perceived usefulness of multimodal data in online
                 reviews of tourism products, this article explores the
                 optimization method of online tourism products based on
                 user-generated content and conducts \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sun:2021:TCA,
  author =       "Lin Sun and Wenzheng Xu and Jimin Liu",
  title =        "Two-channel Attention Mechanism Fusion Model of Stock
                 Price Prediction Based on {CNN-LSTM}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "83:1--83:12",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3453693",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3453693",
  abstract =     "Using hierarchical CNN, the company's multiple news is
                 characterized as three levels: sentence vectors,
                 chapter vectors, and enterprise sentiment vectors. By
                 combining the stock price data with the news lyric data
                 at the same time, the influence of news \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jain:2021:HCL,
  author =       "Praphula Kumar Jain and Vijayalakshmi Saravanan and
                 Rajendra Pamula",
  title =        "A Hybrid {CNN-LSTM}: a Deep Learning Approach for
                 Consumer Sentiment Analysis Using Qualitative
                 User-Generated Contents",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "84:1--84:15",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3457206",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3457206",
  abstract =     "With the fastest growth of information and
                 communication technology (ICT), the availability of web
                 content on social media platforms is increasing day by
                 day. Sentiment analysis from online reviews drawing
                 researchers' attention from various organizations
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Deng:2021:CCB,
  author =       "Fei Deng and Timothy V. Rasinski",
  title =        "A Computer Corpus-Based Study of {Chinese} {EFL}
                 Learners' Use of Adverbial Connectors and Its
                 Implications for Building a Language-Based Learning
                 Environment",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "85:1--85:16",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3457987",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3457987",
  abstract =     "This research adopts the methodology of corpus-based
                 analysis and contrastive interlanguage analysis (CIA),
                 using three corpora as the data source to analyze the
                 adverbial connectors used by Chinese EFL (English as a
                 foreign language) learners (i.e., \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Deng:2021:CPC,
  author =       "Yongliang Deng and Hua Zhang",
  title =        "Configurational Path to {Chinese} Reading Stickiness
                 of Digital Library",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "86:1--86:18",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3459092",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3459092",
  abstract =     "Attracting and retaining readers in an increasingly
                 competitive environment is an urgent problem for
                 digital libraries of original literature. However, few
                 empirical studies address online reading stickiness,
                 particularly the factors affecting the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Javed:2021:BSS,
  author =       "Abdul Rehman Javed and Saif Ur Rehman and Mohib Ullah
                 Khan and Mamoun Alazab and Habib Ullah Khan",
  title =        "{Betalogger}: Smartphone Sensor-based Side-channel
                 Attack Detection and Text Inference Using Language
                 Modeling and Dense {MultiLayer} Neural Network",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "87:1--87:17",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3460392",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460392",
  abstract =     "With the recent advancement of smartphone technology
                 in the past few years, smartphone usage has increased
                 on a tremendous scale due to its portability and
                 ability to perform many daily life tasks. As a result,
                 smartphones have become one of the most \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "87",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Lavanya:2021:MRS,
  author =       "R. Lavanya and B. Bharathi",
  title =        "Movie Recommendation System to Solve Data Sparsity
                 Using Collaborative Filtering Approach",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "88:1--88:14",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3459091",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3459091",
  abstract =     "With the increase in numbers of multimedia
                 technologies around us, movies and videos on social
                 media and OTT platforms are growing, making it
                 confusing for users to decide which one to watch for.
                 For this, movie recommendation systems are widely used.
                 It \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "88",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ma:2021:IAV,
  author =       "Jun Ma and Hongzhi Yu and Yan Xu and Kaiying Deng",
  title =        "An Investigational Approach for Vowels of the {Salar}
                 Language Based on a Database of Speech Acoustic
                 Parameters",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "89:1--89:10",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3459927",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3459927",
  abstract =     "According to relevant specifications, this article
                 divides, marks, and extracts the acquired speech
                 signals of the Salar language, and establishes the
                 speech acoustic parameter database of the Salar
                 language. Then, the vowels of the Salar language are
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "89",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kumar:2021:SAU,
  author =       "Akshi Kumar and Victor Hugo C. Albuquerque",
  title =        "Sentiment Analysis Using {XLM-R} Transformer and
                 Zero-shot Transfer Learning on Resource-poor {Indian}
                 Language",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "90:1--90:13",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3461764",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3461764",
  abstract =     "Sentiment analysis on social media relies on
                 comprehending the natural language and using a robust
                 machine learning technique that learns multiple layers
                 of representations or features of the data and produces
                 state-of-the-art prediction results. The \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "90",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhou:2021:NRO,
  author =       "Zhou Zhou and Fangmin Li and Shuiqiao Yang",
  title =        "A Novel Resource Optimization Algorithm Based on
                 Clustering and Improved Differential Evolution Strategy
                 Under a Cloud Environment",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "5",
  pages =        "91:1--91:15",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3462761",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Oct 5 08:44:30 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3462761",
  abstract =     "Resource optimization algorithm based on clustering
                 and improved differential evolution strategy, as a new
                 global optimized algorithm, has wide applications in
                 language translation, language processing, document
                 understanding, cloud computing, and edge \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "91",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Tan:2021:BBT,
  author =       "Minghuan Tan and Jing Jiang and Bing Tian Dai",
  title =        "A {BERT}-Based Two-Stage Model for {Chinese Chengyu}
                 Recommendation",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "92:1--92:18",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3453185",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3453185",
  abstract =     "In Chinese, Chengyu are fixed phrases consisting of
                 four characters. As a type of idioms, their meanings
                 usually cannot be derived from their component
                 characters. In this article, we study the task of
                 recommending a Chengyu given a textual context.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "92",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Xiang:2021:RCL,
  author =       "Lu Xiang and Junnan Zhu and Yang Zhao and Yu Zhou and
                 Chengqing Zong",
  title =        "Robust Cross-lingual Task-oriented Dialogue",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "93:1--93:24",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3457571",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3457571",
  abstract =     "Cross-lingual dialogue systems are increasingly
                 important in e-commerce and customer service due to the
                 rapid progress of globalization. In real-world system
                 deployment, machine translation (MT) services are often
                 used before and after the dialogue system \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "93",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Premjith:2021:DLA,
  author =       "B. Premjith and K. P. Soman",
  title =        "Deep Learning Approach for the Morphological Synthesis
                 in {Malayalam} and {Tamil} at the Character Level",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "94:1--94:17",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3457976",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3457976",
  abstract =     "Morphological synthesis is one of the main components
                 of Machine Translation (MT) frameworks, especially when
                 any one or both of the source and target languages are
                 morphologically rich. Morphological synthesis is the
                 process of combining two words or two \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "94",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mundotiya:2021:LRB,
  author =       "Rajesh Kumar Mundotiya and Manish Kumar Singh and
                 Rahul Kapur and Swasti Mishra and Anil Kumar Singh",
  title =        "Linguistic Resources for {Bhojpuri}, {Magahi}, and
                 {Maithili}: Statistics about Them, Their Similarity
                 Estimates, and Baselines for Three Applications",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "95:1--95:37",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3458250",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3458250",
  abstract =     "Corpus preparation for low-resource languages and for
                 development of human language technology to analyze or
                 computationally process them is a laborious task,
                 primarily due to the unavailability of expert linguists
                 who are native speakers of these \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "95",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Telemala:2021:ETL,
  author =       "Joseph P. Telemala and Hussein Suleman",
  title =        "Exploring Topic-language Preferences in Multilingual
                 {Swahili} Information Retrieval in {Tanzania}",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "96:1--96:30",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3458671",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3458671",
  abstract =     "Habitual switching of languages is a common behaviour
                 among polyglots when searching for information on the
                 Web. Studies in information retrieval (IR) and
                 multilingual information retrieval (MLIR) suggest that
                 part of the reason for such regular switching
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "96",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Tian:2021:RRO,
  author =       "Yaolin Tian and Weize Gao and Xuxing Liu and Shanxiong
                 Chen and Bofeng Mo",
  title =        "The Research on Rejoining of the Oracle Bone Rubbings
                 Based on Curve Matching",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "97:1--97:17",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3460393",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460393",
  abstract =     "The rejoining of oracle bone rubbings is a fundamental
                 topic for oracle research. However, it is a tough task
                 to reassemble severely broken oracle bone rubbings
                 because of detail loss in manual labeling, the great
                 time consumption of rejoining, and the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "97",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Munir:2021:NUS,
  author =       "Kashif Munir and Hai Zhao and Zuchao Li",
  title =        "Neural Unsupervised Semantic Role Labeling",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "98:1--98:16",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3461613",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3461613",
  abstract =     "The task of semantic role labeling (SRL) is dedicated
                 to finding the predicate-argument structure. Previous
                 works on SRL are mostly supervised and do not consider
                 the difficulty in labeling each example which can be
                 very expensive and time-consuming. In \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "98",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Saha:2021:UDM,
  author =       "Tulika Saha and Dhawal Gupta and Sriparna Saha and
                 Pushpak Bhattacharyya",
  title =        "A Unified Dialogue Management Strategy for
                 Multi-intent Dialogue Conversations in Multiple
                 Languages",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "99:1--99:22",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3461763",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3461763",
  abstract =     "Building Virtual Agents capable of carrying out
                 complex queries of the user involving multiple intents
                 of a domain is quite a challenge, because it demands
                 that the agent manages several subtasks simultaneously.
                 This article presents a universal Deep \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "99",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ni:2021:MDT,
  author =       "Weijian Ni and Tong Liu and Qingtian Zeng and Nengfu
                 Xie",
  title =        "Mining Domain Terminologies Using Search Engine's
                 Query Log",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "100:1--100:32",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3462327",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3462327",
  abstract =     "Domain terminologies are a basic resource for various
                 natural language processing tasks. To automatically
                 discover terminologies for a domain of interest, most
                 traditional approaches mostly rely on a domain-specific
                 corpus given in advance; thus, the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "100",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Xu:2021:CDG,
  author =       "Jun Xu and Zeyang Lei and Haifeng Wang and Zheng-Yu
                 Niu and Hua Wu and Wanxiang Che and Jizhou Huang and
                 Ting Liu",
  title =        "Coherent Dialog Generation with Query Graph",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "101:1--101:23",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3462551",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3462551",
  abstract =     "Learning to generate coherent and informative dialogs
                 is an enduring challenge for open-domain conversation
                 generation. Previous work leverage knowledge graph or
                 documents to facilitate informative dialog generation,
                 with little attention on dialog \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "101",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Joshi:2021:SSG,
  author =       "Manju Lata Joshi and Nisheeth Joshi and Namita
                 Mittal",
  title =        "{SGATS}: Semantic Graph-based Automatic Text
                 Summarization from {Hindi} Text Documents",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "102:1--102:32",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464381",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464381",
  abstract =     "Creating a coherent summary of the text is a
                 challenging task in the field of Natural Language
                 Processing (NLP). Various Automatic Text Summarization
                 techniques have been developed for abstractive as well
                 as extractive summarization. This study focuses on
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "102",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Byambadorj:2021:NTM,
  author =       "Zolzaya Byambadorj and Ryota Nishimura and Altangerel
                 Ayush and Norihide Kitaoka",
  title =        "Normalization of Transliterated {Mongolian} Words
                 Using {Seq2Seq} Model with Limited Data",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "103:1--103:19",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464361",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464361",
  abstract =     "The huge increase in social media use in recent years
                 has resulted in new forms of social interaction,
                 changing our daily lives. Due to increasing contact
                 between people from different cultures as a result of
                 globalization, there has also been an increase
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "103",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kaing:2021:TTP,
  author =       "Hour Kaing and Chenchen Ding and Masao Utiyama and
                 Eiichiro Sumita and Sethserey Sam and Sopheap Seng and
                 Katsuhito Sudoh and Satoshi Nakamura",
  title =        "Towards Tokenization and Part-of-Speech Tagging for
                 {Khmer}: Data and Discussion",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "104:1--104:16",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464378",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464378",
  abstract =     "As a highly analytic language, Khmer has considerable
                 ambiguities in tokenization and part-of-speech (POS)
                 tagging processing. This topic is investigated in this
                 study. Specifically, a 20,000-sentence Khmer corpus
                 with manual tokenization and POS-tagging \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "104",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Tian:2021:NCM,
  author =       "Xiuxia Tian and Can Li and Bo Zhao",
  title =        "A Novel Classification Model {SA-MPCNN} for Power
                 Equipment Defect Text",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "105:1--105:21",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464380",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464380",
  abstract =     "The text classification of power equipment defect is
                 of great significance to equipment health condition
                 evaluation and power equipment maintenance decisions.
                 Most of the existing classification methods do not
                 sufficiently consider the semantic relation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "105",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sen:2021:BGT,
  author =       "Shibaprasad Sen and Ankan Bhattacharyya and Ram Sarkar
                 and Kaushik Roy",
  title =        "{BYANJON}: a Ground Truth Preparation System for
                 Online Handwritten {Bangla} Documents",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "106:1--106:16",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464379",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464379",
  abstract =     "The work reported in this article deals with the
                 ground truth generation scheme for online handwritten
                 Bangla documents at text-line, word, and stroke levels.
                 The aim of the proposed scheme is twofold: firstly, to
                 build a document level database so that \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "106",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Maimaiti:2021:IDA,
  author =       "Mieradilijiang Maimaiti and Yang Liu and Huanbo Luan
                 and Zegao Pan and Maosong Sun",
  title =        "Improving Data Augmentation for Low-Resource {NMT}
                 Guided by {POS}-Tagging and Paraphrase Embedding",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "107:1--107:21",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464427",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464427",
  abstract =     "Data augmentation is an approach for several text
                 generation tasks. Generally, in the machine translation
                 paradigm, mainly in low-resource language scenarios,
                 many data augmentation methods have been proposed. The
                 most used approaches for generating \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "107",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Vo:2021:SIS,
  author =       "Tham Vo",
  title =        "{SE4ExSum}: an Integrated Semantic-aware Neural
                 Approach with Graph Convolutional Network for
                 Extractive Text Summarization",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "108:1--108:22",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464426",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464426",
  abstract =     "Recently, advanced techniques in deep learning such as
                 recurrent neural network (GRU, LSTM and Bi-LSTM) and
                 auto-encoding (attention-based transformer and BERT)
                 have achieved great successes in multiple application
                 domains including text summarization. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "108",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liu:2021:MCS,
  author =       "Lei Liu and Hao Chen and Yinghong Sun",
  title =        "A Multi-Classification Sentiment Analysis Model of
                 {Chinese} Short Text Based on Gated Linear Units and
                 Attention Mechanism",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "109:1--109:13",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464425",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464425",
  abstract =     "Sentiment analysis of social media texts has become a
                 research hotspot in information processing. Sentiment
                 analysis methods based on the combination of machine
                 learning and sentiment lexicon need to select features.
                 Selected emotional features are often \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "109",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Niwa:2021:CCR,
  author =       "Ayana Niwa and Naoaki Okazaki and Kohei Wakimoto and
                 Keisuke Nishiguchi and Masataka Mouri",
  title =        "Construction of a Corpus of Rhetorical Devices in
                 Slogans and Structural Analysis of Antitheses",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "110:1--110:26",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3465218",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3465218",
  abstract =     "An advertising slogan is a sentence that expresses a
                 product or a work of art in a straightforward manner
                 and is used for advertising and publicity. Moving the
                 consumer's mind and attracting their interest can
                 significantly influence sales. Although \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "110",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Shin:2021:EEA,
  author =       "Jaehun Shin and Wonkee Lee and Byung-Hyun Go and
                 Baikjin Jung and Youngkil Kim and Jong-Hyeok Lee",
  title =        "Exploration of Effective Attention Strategies for
                 Neural Automatic Post-editing with Transformer",
  journal =      j-TALLIP,
  volume =       "20",
  number =       "6",
  pages =        "111:1--111:17",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3465383",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Sat Oct 16 05:29:47 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3465383",
  abstract =     "Automatic post-editing (APE) is the study of
                 correcting translation errors in the output of an
                 unknown machine translation (MT) system and has been
                 considered as a method of improving translation quality
                 without any modification to conventional MT \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "111",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kumar:2022:ISI,
  author =       "Akshi Kumar and Christian Esposito and Dimitrios A.
                 Karras",
  title =        "Introduction to Special Issue on Misinformation, Fake
                 News and Rumor Detection in Low-Resource Languages",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "1e:1--1e:3",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3505588",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3505588",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "1e",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sangwan:2022:DCD,
  author =       "Saurabh R. Sangwan and M. P. S. Bhatia",
  title =        "Denigrate Comment Detection in Low-Resource {Hindi}
                 Language Using Attention-Based Residual Networks",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "1:1--1:14",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3431729",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3431729",
  abstract =     "Cyberspace has been recognized as a conducive
                 environment for use of various hostile, direct, and
                 indirect behavioural tactics to target individuals or
                 groups. Denigration is one of the most frequently used
                 cyberbullying ploys to actively damage, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bhowmick:2022:MDF,
  author =       "Rajat Subhra Bhowmick and Isha Ganguli and Jayanta
                 Paul and Jaya Sil",
  title =        "A Multimodal Deep Framework for Derogatory Social
                 Media Post Identification of a Recognized Person",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "2:1--2:19",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447651",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447651",
  abstract =     "In today's era of digitization, social media platforms
                 play a significant role in networking and influencing
                 the perception of the general population. Social
                 network sites have recently been used to carry out
                 harmful attacks against individuals, including
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jain:2022:FNC,
  author =       "Rachna Jain and Deepak Kumar Jain and Dharana and
                 Nitika Sharma",
  title =        "Fake News Classification: a Quantitative Research
                 Description",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "3:1--3:17",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447650",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447650",
  abstract =     "Social media can render content circulating to reach
                 millions with a knack to influence people, despite the
                 questionable authencity of the facts. Internet sources
                 are the most convenient and easy approach to obtain any
                 information these days. Fake news \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ranasinghe:2022:MOL,
  author =       "Tharindu Ranasinghe and Marcos Zampieri",
  title =        "Multilingual Offensive Language Identification for
                 Low-resource Languages",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "4:1--4:13",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3457610",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3457610",
  abstract =     "Offensive content is pervasive in social media and a
                 reason for concern to companies and government
                 organizations. Several studies have been recently
                 published investigating methods to detect the various
                 forms of such content (e.g., hate speech, \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Das:2022:DLA,
  author =       "Soma Das and Pooja Rai and Sanjay Chatterji",
  title =        "Deep Level Analysis of Legitimacy in {Bengali} News
                 Sentences",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "5:1--5:18",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3459928",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3459928",
  abstract =     "The tremendous increase in the growth of
                 misinformation in news articles has the potential
                 threat for the adverse effects on society. Hence, the
                 detection of misinformation in news data has become an
                 appealing research area. The task of annotating and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Saeed:2022:ECE,
  author =       "Ramsha Saeed and Hammad Afzal and Haider Abbas and
                 Maheen Fatima",
  title =        "Enriching Conventional Ensemble Learner with Deep
                 Contextual Semantics to Detect Fake News in {Urdu}",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "6:1--6:19",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3461614",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3461614",
  abstract =     "Increased connectivity has contributed greatly in
                 facilitating rapid access to information and reliable
                 communication. However, the uncontrolled information
                 dissemination has also resulted in the spread of fake
                 news. Fake news might be spread by a group \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Gumaei:2022:EAR,
  author =       "Abdu Gumaei and Mabrook S. Al-Rakhami and Mohammad
                 Mehedi Hassan and Victor Hugo C. {De Albuquerque} and
                 David Camacho",
  title =        "An Effective Approach for Rumor Detection of {Arabic}
                 Tweets Using {eXtreme} Gradient Boosting Method",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "7:1--7:16",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3461697",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3461697",
  abstract =     "Twitter is currently one of the most popular
                 microblogging platforms allowing people to post short
                 messages, news, thoughts, and so on. The Twitter user
                 community is growing very fast. It has an average of
                 328 million active accounts today, making it one
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Dhall:2022:BBF,
  author =       "Sakshi Dhall and Ashutosh Dhar Dwivedi and Saibal K.
                 Pal and Gautam Srivastava",
  title =        "Blockchain-based Framework for Reducing Fake or
                 Vicious News Spread on Social Media\slash Messaging
                 Platforms",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "8:1--8:33",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3467019",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3467019",
  abstract =     "With social media becoming the most frequently used
                 mode of modern-day communications, the propagation of
                 fake or vicious news through such modes of
                 communication has emerged as a serious problem. The
                 scope of the problem of fake or vicious news may range
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{De:2022:TBA,
  author =       "Arkadipta De and Dibyanayan Bandyopadhyay and Baban
                 Gain and Asif Ekbal",
  title =        "A Transformer-Based Approach to Multilingual Fake News
                 Detection in Low-Resource Languages",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "9:1--9:20",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3472619",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3472619",
  abstract =     "Fake news classification is one of the most
                 interesting problems that has attracted huge attention
                 to the researchers of artificial intelligence, natural
                 language processing, and machine learning (ML). Most of
                 the current works on fake news detection are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Samadi:2022:PFN,
  author =       "Mohammadreza Samadi and Maryam Mousavian and Saeedeh
                 Momtazi",
  title =        "{Persian} Fake News Detection: Neural Representation
                 and Classification at Word and Text Levels",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "10:1--10:11",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3472620",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3472620",
  abstract =     "Nowadays, broadcasting news on social media and
                 websites has grown at a swifter pace, which has had
                 negative impacts on both the general public and
                 governments; hence, this has urged us to build a fake
                 news detection system. Contextualized word embeddings
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Najadat:2022:DAS,
  author =       "Hassan Najadat and Mohammad A. Alzubaidi and Islam
                 Qarqaz",
  title =        "Detecting {Arabic} Spam Reviews in Social Networks
                 Based on Classification Algorithms",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "11:1--11:13",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3476115",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3476115",
  abstract =     "Reviews or comments that users leave on social media
                 have great importance for companies and business
                 entities. New product ideas can be evaluated based on
                 customer reactions. However, this use of social media
                 is complicated by those who post spam on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jahanbakhsh-Nagadeh:2022:DCB,
  author =       "Zoleikha Jahanbakhsh-Nagadeh and Mohammad-Reza
                 Feizi-Derakhshi and Arash Sharifi",
  title =        "A Deep Content-Based Model for {Persian} Rumor
                 Verification",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "12:1--12:29",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487289",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487289",
  abstract =     "During the development of social media, there has been
                 a transformation in social communication. Despite their
                 positive applications in social interactions and news
                 spread, it also provides an ideal platform for
                 spreading rumors. Rumors can endanger the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Alam:2022:RUP,
  author =       "Mehreen Alam and Sibt {Ul Hussain}",
  title =        "{Roman--Urdu--Parl}: {Roman--Urdu} and {Urdu} Parallel
                 Corpus for {Urdu} Language Understanding",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "13:1--13:20",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464424",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464424",
  abstract =     "Availability of corpora is a basic requirement for
                 conducting research in a particular language.
                 Unfortunately, for a morphologically rich language like
                 Urdu, despite being used by over a 100 million people
                 around the globe, the dearth of corpora is a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Nassif:2022:EES,
  author =       "Ali Bou Nassif and Abdollah Masoud Darya and Ashraf
                 Elnagar",
  title =        "Empirical Evaluation of Shallow and Deep Learning
                 Classifiers for {Arabic} Sentiment Analysis",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "14:1--14:25",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3466171",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3466171",
  abstract =     "This work presents a detailed comparison of the
                 performance of deep learning models such as
                 convolutional neural networks, long short-term memory,
                 gated recurrent units, their hybrids, and a selection
                 of shallow learning classifiers for sentiment analysis
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Phukon:2022:SEU,
  author =       "Bornali Phukon and Akash Anil and Sanasam Ranbir Singh
                 and Priyankoo Sarmah",
  title =        "Synonymy Expansion Using Link Prediction Methods: a
                 Case Study of {Assamese} {WordNet}",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "15:1--15:21",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3467966",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3467966",
  abstract =     "WordNets built for low-resource languages, such as
                 Assamese, often use the expansion methodology. This may
                 result in missing lexical entries and missing synonymy
                 relations. As the Assamese WordNet is also built using
                 the expansion method, using the Hindi \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Eddine:2022:NCE,
  author =       "Meftah Mohammed Charaf Eddine",
  title =        "A New Concept of Electronic Text Based on Semantic
                 Coding System for Machine Translation",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "16:1--16:16",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3469655",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3469655",
  abstract =     "In the field of machine translation of texts, the
                 ambiguity in both lexical (dictionary) and structural
                 aspects is still one of the difficult problems.
                 Researchers in this field use different approaches, the
                 most important of which is machine learning in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Xiang:2022:EGN,
  author =       "Yan Xiang and Zhengtao Yu and Junjun Guo and Yuxin
                 Huang and Yantuan Xian",
  title =        "Event Graph Neural Network for Opinion Target
                 Classification of Microblog Comments",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "17:1--17:13",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3469725",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3469725",
  abstract =     "Opinion target classification of microblog comments is
                 one of the most important tasks for public opinion
                 analysis about an event. Due to the high cost of manual
                 labeling, opinion target classification is generally
                 considered as a weak-supervised task. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Turan:2022:CIA,
  author =       "Erhan Turan and Umut Orhan",
  title =        "Confidence Indexing of Automated Detected Synsets: a
                 Case Study on Contemporary {Turkish} Dictionary",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "18:1--18:19",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3469724",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3469724",
  abstract =     "In this study, a novel confidence indexing algorithm
                 is proposed to minimize human labor in controlling the
                 reliability of automatically extracted synsets from a
                 non-machine-readable monolingual dictionary.
                 Contemporary Turkish Dictionary of Turkish \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Baruah:2022:LRN,
  author =       "Rupjyoti Baruah and Rajesh Kumar Mundotiya and Anil
                 Kumar Singh",
  title =        "Low Resource Neural Machine Translation: {Assamese}
                 to\slash from Other {Indo--Aryan} ({Indic}) Languages",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "19:1--19:32",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3469721",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3469721",
  abstract =     "Machine translation (MT) systems have been built using
                 numerous different techniques for bridging the language
                 barriers. These techniques are broadly categorized into
                 approaches like Statistical Machine Translation (SMT)
                 and Neural Machine Translation (\ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Fadel:2022:NAT,
  author =       "Ali Fadel and Ibraheem Tuffaha and Mahmoud Al-Ayyoub",
  title =        "Neural {Arabic} Text Diacritization: State-of-the-Art
                 Results and a Novel Approach for {Arabic} {NLP}
                 Downstream Tasks",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "20:1--20:25",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3470849",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3470849",
  abstract =     "In this work, we present several deep learning models
                 for the automatic diacritization of Arabic text. Our
                 models are built using two main approaches, viz.
                 Feed-Forward Neural Network (FFNN) and Recurrent Neural
                 Network (RNN), with several enhancements \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kulkarni:2022:SAH,
  author =       "Dhanashree S. Kulkarni and Sunil S. Rodd",
  title =        "Sentiment Analysis in {Hindi} --- a Survey on the
                 State-of-the-art Techniques",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "1",
  pages =        "21:1--21:46",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3469722",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Jan 31 07:33:24 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3469722",
  abstract =     "Sentiment Analysis (SA) has been a core interest in
                 the field of text mining research, dealing with
                 computational processing of sentiments, views, and
                 subjective nature of the text. Due to the availability
                 of extensive web-based data in Indian languages
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yu:2022:ICV,
  author =       "Zhiqiang Yu and Zhengtao Yu and Yantuan Xian and Yuxin
                 Huang and Junjun Guo",
  title =        "Improving {Chinese--Vietnamese} Neural Machine
                 Translation with Linguistic Differences",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "22:1--22:12",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3477536",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3477536",
  abstract =     "We present a simple, efficient data augmentation
                 approach for boosting Chinese-Vietnamese neural machine
                 translation performance by leveraging the linguistic
                 difference between the two languages. We first define
                 the formalized representation of modifier \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Halabi:2022:INA,
  author =       "Dana Halabi and Ebaa Fayyoumi and Arafat Awajan",
  title =        "{I3rab}: a New {Arabic} Dependency Treebank Based on
                 {Arabic} Grammatical Theory",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "23:1--23:32",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3472295",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3472295",
  abstract =     "Treebanks are valuable linguistic resources that
                 include the syntactic structure of a language sentence
                 in addition to part-of-speech tags and morphological
                 features. They are mainly utilized in modeling
                 statistical parsers. Although the statistical
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Yang:2022:ASN,
  author =       "Haitong Yang and Guangyou Zhou and Tingting He",
  title =        "Adversarial Separation Network for Text Style
                 Transfer",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "24:1--24:14",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3472621",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3472621",
  abstract =     "This article considers the task of text style
                 transfer: transforming a specific style of sentence
                 into another while preserving its style-independent
                 content. A dominate approach to text style transfer is
                 to learn a good content factor of text, define a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Fatima:2022:DCL,
  author =       "Ghazeefa Fatima and Rao Muhammad Adeel Nawab and
                 Muhammad Salman Khan and Ali Saeed",
  title =        "Developing a Cross-lingual Semantic Word Similarity
                 Corpus for {English--Urdu} Language Pair",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "25:1--25:16",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3472618",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3472618",
  abstract =     "Semantic word similarity is a quantitative measure of
                 how much two words are contextually similar. Evaluation
                 of semantic word similarity models requires a benchmark
                 corpus. However, despite the millions of speakers and
                 the large digital text of the Urdu \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Muneer:2022:CLT,
  author =       "Iqra Muneer and Rao Muhammad Adeel Nawab",
  title =        "Cross-lingual Text Reuse Detection Using Translation
                 Plus Monolingual Analysis for {English-Urdu} Language
                 Pair",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "26:1--26:18",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3473331",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3473331",
  abstract =     "Cross-Lingual Text Reuse Detection (CLTRD) has
                 recently attracted the attention of the research
                 community due to a large amount of digital text readily
                 available for reuse in multiple languages through
                 online digital repositories. In addition, efficient
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Xu:2022:LRL,
  author =       "Fan Xu and Yangjie Dan and Keyu Yan and Yong Ma and
                 Mingwen Wang",
  title =        "Low-Resource Language Discrimination toward {Chinese}
                 Dialects with Transfer Learning and Data Augmentation",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "27:1--27:21",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3473499",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3473499",
  abstract =     "Chinese dialects discrimination is a challenging
                 natural language processing task due to scarce
                 annotation resource. In this article, we develop a
                 novel Chinese dialects discrimination framework with
                 transfer learning and data augmentation (CDDTLDA) in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Rana:2022:UAS,
  author =       "Toqir A. Rana and Kiran Shahzadi and Tauseef Rana and
                 Ahsan Arshad and Mohammad Tubishat",
  title =        "An Unsupervised Approach for Sentiment Analysis on
                 Social Media Short Text Classification in {Roman
                 Urdu}",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "28:1--28:16",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3474119",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3474119",
  abstract =     "During the last two decades, sentiment analysis, also
                 known as opinion mining, has become one of the most
                 explored research areas in Natural Language Processing
                 (NLP) and data mining. Sentiment analysis focuses on
                 the sentiments or opinions of consumers \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mukherjee:2022:URL,
  author =       "Jayati Mukherjee and Swapan K. Parui and Utpal Roy",
  title =        "An Unsupervised and Robust Line and Word Segmentation
                 Method for Handwritten and Degraded Printed Document",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "29:1--29:31",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3474118",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3474118",
  abstract =     "Segmentation of text lines and words in an
                 unconstrained handwritten or a machine-printed degraded
                 document is a challenging document analysis problem due
                 to the heterogeneity in the document structure. Often
                 there is un-even skew between the lines and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mukta:2022:CGB,
  author =       "Md. Saddam Hossain Mukta and Md. Adnanul Islam and
                 Faisal Ahamed Khan and Afjal Hossain and Shuvanon Razik
                 and Shazzad Hossain and Jalal Mahmud",
  title =        "A Comprehensive Guideline for {Bengali} Sentiment
                 Annotation",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "30:1--30:19",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3474363",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3474363",
  abstract =     "Sentiment Analysis (SA) is a Natural Language
                 Processing (NLP) and an Information Extraction (IE)
                 task that primarily aims to obtain the writer's
                 feelings expressed in positive or negative by analyzing
                 a large number of documents. SA is also widely
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Hu:2022:GFQ,
  author =       "Yue Hu and Haitong Yang and Guangyou Zhou and Jimmy
                 Xiangji Huang",
  title =        "Generating Factoid Questions with Question Type
                 Enhanced Representation and Attention-based Copy
                 Mechanism",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "31:1--31:18",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3474555",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3474555",
  abstract =     "Question generation over knowledge bases is an
                 important research topic. How to deal with rare and
                 low-frequency words in traditional generation models is
                 a key challenge for question generation. Although the
                 copy mechanism provides significant \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Singh:2022:CSH,
  author =       "Pawan Kumar Singh and Ram Sarkar and Ajith Abraham and
                 Mita Nasipuri",
  title =        "A Case Study on Handwritten {Indic} Script
                 Classification: Benchmarking of the Results at Page,
                 Block, Text-line, and Word Levels",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "32:1--32:36",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3476102",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3476102",
  abstract =     "Handwritten script classification is still considered
                 as a challenging research problem in the domain of
                 document image analysis. Although some research
                 attempts have been made by the researchers for solving
                 the challenging issues, a comprehensive \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Gu:2022:MTF,
  author =       "Xiaoqing Gu and Kaijian Xia and Yizhang Jiang and
                 Alireza Jolfaei",
  title =        "Multi-task Fuzzy Clustering-Based Multi-task {TSK}
                 Fuzzy System for Text Sentiment Classification",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "33:1--33:24",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3476103",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3476103",
  abstract =     "Text sentiment classification is an important
                 technology for natural language processing. A fuzzy
                 system is a strong tool for processing imprecise or
                 ambiguous data, and it can be used for text sentiment
                 analysis. This article proposes a new formulation
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sarwar:2022:UWU,
  author =       "Raheem Sarwar and Saeed-Ul Hassan",
  title =        "{UrduAI}: Writeprints for {Urdu} Authorship
                 Identification",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "34:1--34:18",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3476467",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3476467",
  abstract =     "The authorship identification task aims at identifying
                 the original author of an anonymous text sample from a
                 set of candidate authors. It has several application
                 domains such as digital text forensics and information
                 retrieval. These application domains \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Fatima:2022:SAW,
  author =       "Tayyaba Fatima and Raees {Ul Islam} and Muhammad Waqas
                 Anwar and M. Hasan Jamal and M. Tayyab Chaudhry and
                 Zeeshan Gillani",
  title =        "{STEMUR}: an Automated Word Conflation Algorithm for
                 the {Urdu} Language",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "35:1--35:20",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3476226",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3476226",
  abstract =     "Stemming is a common word conflation method that
                 perceives stems embedded in the words and decreases
                 them to their stem (root) by conflating all the
                 morphologically related terms into a single term,
                 without doing a complete morphological analysis. This
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Hiraoka:2022:RNH,
  author =       "Tatsuya Hiraoka and Sho Takase and Kei Uchiumi and
                 Atsushi Keyaki and Naoaki Okazaki",
  title =        "Recurrent Neural Hidden {Markov} Model for High-order
                 Transition",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "36:1--36:15",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3476511",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3476511",
  abstract =     "We propose a method to pay attention to high-order
                 relations among latent states to improve the
                 conventional HMMs that focus only on the latest latent
                 state, since they assume Markov property. To address
                 the high-order relations, we apply an RNN to each
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{S:2022:IWS,
  author =       "Sruthi S. and B. Kannan and Binu Paul",
  title =        "Improved Word Sense Determination in {Malayalam} using
                 Latent {Dirichlet} Allocation and Semantic Features",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "37:1--37:11",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3476978",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3476978",
  abstract =     "Recent years have witnessed phenomenal developments
                 worldwide in the field of NLP. But developments in
                 Indian regional languages are very few compared to
                 them. This work is a step towards the construction of a
                 target word sense disambiguation system in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Saeed:2022:IFD,
  author =       "Ali Saeed and Rao Muhammad Adeel Nawab and Mark
                 Stevenson",
  title =        "Investigating the Feasibility of Deep Learning Methods
                 for {Urdu} Word Sense Disambiguation",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "38:1--38:16",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3477578",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3477578",
  abstract =     "Word Sense Disambiguation (WSD), the process of
                 automatically identifying the correct meaning of a word
                 used in a given context, is a significant challenge in
                 Natural Language Processing. A range of approaches to
                 the problem has been explored by the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Katyayan:2022:DAR,
  author =       "Pragya Katyayan and Nisheeth Joshi",
  title =        "Development of Automatic Rule-based Semantic Tagger
                 and {Karaka} Analyzer for {Hindi}",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "39:1--39:25",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3479155",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3479155",
  abstract =     "Hindi is the third most-spoken language in the world
                 (615 million speakers) and has the fourth highest
                 native speakers (341 million). It is an inflectionally
                 rich and relatively free word-order language with an
                 immense vocabulary set. Despite being such a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Abbad:2022:SED,
  author =       "Hamza Abbad and Shengwu Xiong",
  title =        "Simple Extensible Deep Learning Model for Automatic
                 {Arabic} Diacritization",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "40:1--40:16",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3480938",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3480938",
  abstract =     "Automatic diacritization is an Arabic natural language
                 processing topic based on the sequence labeling task
                 where the labels are the diacritics and the letters are
                 the sequence elements. A letter can have from zero up
                 to two diacritics. The dataset used \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Huang:2022:DAW,
  author =       "Kaiyu Huang and Keli Xiao and Fengran Mo and Bo Jin
                 and Zhuang Liu and Degen Huang",
  title =        "Domain-Aware Word Segmentation for {Chinese} Language:
                 a Document-Level Context-Aware Model",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "41:1--41:16",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3481298",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3481298",
  abstract =     "Word segmentation is an essential and challenging task
                 in natural language processing, especially for the
                 Chinese language due to its high linguistic complexity.
                 Existing methods for Chinese word segmentation,
                 including statistical machine learning \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Bai:2022:UPT,
  author =       "Guirong Bai and Shizhu He and Kang Liu and Jun Zhao",
  title =        "Using Pre-trained Language Model to Enhance Active
                 Learning for Sentence Matching",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "2",
  pages =        "42:1--42:19",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3480937",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Mon Mar 28 11:35:36 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3480937",
  abstract =     "Active learning is an effective method to
                 substantially alleviate the problem of expensive
                 annotation cost for data-driven models. Recently,
                 pre-trained language models have been demonstrated to
                 be powerful for learning language representations. In
                 this \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Al-Shatnawi:2022:AHW,
  author =       "Atallah Mahmoud Al-Shatnawi and Faisal Al-Saqqar and
                 Alireza Souri",
  title =        "{Arabic} Handwritten Word Recognition Based on
                 Stationary Wavelet Transform Technique using Machine
                 Learning",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "43:1--43:21",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3474391",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3474391",
  abstract =     "This paper is aimed at improving the performance of
                 the word recognition system (WRS) of handwritten Arabic
                 text by extracting features in the frequency domain
                 using the Stationary Wavelet Transform (SWT) method
                 using machine learning, which is a wavelet \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Nazir:2022:AAR,
  author =       "Zulqarnain Nazir and Khurram Shahzad and Muhammad
                 Kamran Malik and Waheed Anwar and Imran Sarwar Bajwa
                 and Khawar Mehmood",
  title =        "Authorship Attribution for a Resource Poor Language
                 --- {Urdu}",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "44:1--44:23",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487061",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487061",
  abstract =     "Authorship attribution refers to examining the writing
                 style of authors to determine the likelihood of the
                 original author of a document from a given set of
                 potential authors. Due to the wide range of authorship
                 attribution applications, a plethora of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sheikhaei:2022:JTL,
  author =       "Mohammad Sadegh Sheikhaei and Hasan Zafari and Yuan
                 Tian",
  title =        "Joined Type Length Encoding for Nested Named Entity
                 Recognition",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "45:1--45:23",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487057",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487057",
  abstract =     "In this article, we propose a new encoding scheme for
                 named entity recognition (NER) called Joined
                 Type-Length encoding (JoinedTL). Unlike most existing
                 named entity encoding schemes, which focus on flat
                 entities, JoinedTL can label nested named entities
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Li:2022:DVC,
  author =       "Mei Li and Jiajun Zhang and Xiang Lu and Chengqing
                 Zong",
  title =        "Dual-View Conditional Variational Auto-Encoder for
                 Emotional Dialogue Generation",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "46:1--46:18",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3481890",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3481890",
  abstract =     "Emotional dialogue generation aims to generate
                 appropriate responses that are content relevant with
                 the query and emotion consistent with the given emotion
                 tag. Previous work mainly focuses on incorporating
                 emotion information into the sequence to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Raval:2022:IDL,
  author =       "Deepang Raval and Vyom Pathak and Muktan Patel and
                 Brijesh Bhatt",
  title =        "Improving Deep Learning based Automatic Speech
                 Recognition for {Gujarati}",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "47:1--47:18",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3483446",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3483446",
  abstract =     "We present a novel approach for improving the
                 performance of an End-to-End speech recognition system
                 for the Gujarati language. We follow a deep
                 learning-based approach that includes Convolutional
                 Neural Network, Bi-directional Long Short Term Memory
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jiang:2022:TTD,
  author =       "Shu Jiang and Zuchao Li and Hai Zhao and Bao-Liang Lu
                 and Rui Wang",
  title =        "Tri-training for Dependency Parsing Domain
                 Adaptation",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "48:1--48:17",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3488367",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488367",
  abstract =     "In recent years, the research on dependency parsing
                 focuses on improving the accuracy of the
                 domain-specific (in-domain) test datasets and has made
                 remarkable progress. However, there are innumerable
                 scenarios in the real world that are not covered by the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mishra:2022:ECA,
  author =       "Santosh Kumar Mishra and Gaurav Rai and Sriparna Saha
                 and Pushpak Bhattacharyya",
  title =        "Efficient Channel Attention Based Encoder-Decoder
                 Approach for Image Captioning in {Hindi}",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "49:1--49:17",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3483597",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3483597",
  abstract =     "Image captioning refers to the process of generating a
                 textual description that describes objects and
                 activities present in a given image. It connects two
                 fields of artificial intelligence, computer vision, and
                 natural language processing. Computer vision \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Liao:2022:SLM,
  author =       "Xianwen Liao and Yongzhong Huang and Peng Yang and Lei
                 Chen",
  title =        "A Statistical Language Model for Pre-Trained Sequence
                 Labeling: a Case Study on {Vietnamese}",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "50:1--50:21",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3483524",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3483524",
  abstract =     "By defining the computable word segmentation unit and
                 studying its probability characteristics, we establish
                 an unsupervised statistical language model (SLM) for a
                 new pre-trained sequence labeling framework in this
                 article. The proposed SLM is an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2022:TCC,
  author =       "Zhongguo Wang and Bao Zhang",
  title =        "Toxic Comment Classification Based on Bidirectional
                 Gated Recurrent Unit and Convolutional Neural Network",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "51:1--51:12",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3488366",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488366",
  abstract =     "For English toxic comment classification, this paper
                 presents the model that combines Bi-GRU and CNN
                 optimized by global average pooling (BG-GCNN) based on
                 the bidirectional gated recurrent unit (Bi-GRU) and
                 global pooling optimized convolution neural \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Sun:2022:OSR,
  author =       "Jian Sun and Yu Zhou and Chengqing Zong",
  title =        "One-Shot Relation Learning for Knowledge Graphs via
                 Neighborhood Aggregation and Paths Encoding",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "52:1--52:19",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3484729",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3484729",
  abstract =     "The relation learning between two entities is an
                 essential task in knowledge graph (KG) completion that
                 has received much attention recently. Previous work
                 almost exclusively focused on relations widely seen in
                 the original KGs, which means that enough \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Munir:2022:MAI,
  author =       "Kashif Munir and Hongxiao Bai and Hai Zhao and Junhan
                 Zhao",
  title =        "Memorizing All for Implicit Discourse Relation
                 Recognition",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "53:1--53:20",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3485016",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3485016",
  abstract =     "Implicit discourse relation recognition is a
                 challenging task due to the absence of the necessary
                 informative clues from explicit connectives. An
                 implicit discourse relation recognizer has to carefully
                 tackle the semantic similarity of sentence pairs and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Reddy:2022:FBA,
  author =       "A. Pramod Reddy and Vijayarajan V.",
  title =        "Fusion Based {AER} System Using Deep Learning Approach
                 for Amplitude and Frequency Analysis",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "54:1--54:19",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3488369",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488369",
  abstract =     "Automatic emotion recognition from Speech (AERS)
                 systems based on acoustical analysis reveal that some
                 emotional classes persist with ambiguity. This study
                 employed an alternative method aimed at providing deep
                 understanding into the amplitude-frequency, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Zhang:2022:LJE,
  author =       "Hu Zhang and Bangze Pan and Ru Li",
  title =        "Legal Judgment Elements Extraction Approach with Law
                 Article-aware Mechanism",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "55:1--55:15",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3485244",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3485244",
  abstract =     "Legal judgment elements extraction (LJEE) aims to
                 identify the different judgment features from the fact
                 description in legal documents automatically, which
                 helps to improve the accuracy and interpretability of
                 the judgment results. In real court rulings, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Rani:2022:ABS,
  author =       "Sujata Rani and Parteek Kumar",
  title =        "Aspect-based Sentiment Analysis using Dependency
                 Parsing",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "56:1--56:19",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3485243",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3485243",
  abstract =     "In this paper, an aspect-based Sentiment Analysis (SA)
                 system for Hindi is presented. The proposed system
                 assigns a separate sentiment towards the different
                 aspects of a sentence as well as it evaluates the
                 overall sentiment expressed in a sentence. In
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ishraq:2022:TDU,
  author =       "Mir Ragib Ishraq and Nitesh Khadka and Asif Mohammed
                 Samir and M. Shahidur Rahman",
  title =        "Towards Developing Uniform Lexicon Based Sorting
                 Algorithm for Three Prominent {Indo--Aryan} Languages",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "57:1--57:20",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3488371",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488371",
  abstract =     "Three different Indic/Indo-Aryan languages ---
                 Bengali, Hindi and Nepali have been explored here in
                 character level to find out similarities and
                 dissimilarities. Having shared the same root, the
                 Sanskrit, Indic languages bear common characteristics.
                 That is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Pandey:2022:HAS,
  author =       "Shilpa Pandey and Gaurav Harit",
  title =        "Handwritten Annotation Spotting in Printed Documents
                 Using Top-Down Visual Saliency Models",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "58:1--58:25",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3485468",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3485468",
  abstract =     "In this article, we address the problem of localizing
                 text and symbolic annotations on the scanned image of a
                 printed document. Previous approaches have considered
                 the task of annotation extraction as binary
                 classification into printed and handwritten \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Kang:2022:ELT,
  author =       "Xiaomian Kang and Yang Zhao and Jiajun Zhang and
                 Chengqing Zong",
  title =        "Enhancing Lexical Translation Consistency for
                 Document-Level Neural Machine Translation",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "59:1--59:21",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3485469",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3485469",
  abstract =     "Document-level neural machine translation (DocNMT) has
                 yielded attractive improvements. In this article, we
                 systematically analyze the discourse phenomena in
                 Chinese-to-English translation, and focus on the most
                 obvious ones, namely lexical translation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Azmi:2022:LDR,
  author =       "Aqil M. Azmi and Rehab M. Alnefaie and Hatim A.
                 Aboalsamh",
  title =        "Light Diacritic Restoration to Disambiguate Homographs
                 in Modern {Arabic} Texts",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "60:1--60:14",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3486675",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3486675",
  abstract =     "Diacritic restoration (also known as diacritization or
                 vowelization) is the process of inserting the correct
                 diacritical markings into a text. Modern Arabic is
                 typically written without diacritics, e.g., newspapers.
                 This lack of diacritical markings often \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Guo:2022:ACD,
  author =       "Aibo Guo and Xinyi Li and Ning Pang and Xiang Zhao",
  title =        "Adversarial Cross-domain Community Question
                 Retrieval",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "61:1--61:22",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487291",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487291",
  abstract =     "Community Q\&A forum is a special type of social media
                 that provides a platform to raise questions and to
                 answer them (both by forum participants), to facilitate
                 online information sharing. Currently, community Q\&A
                 forums in professional domains have \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wang:2022:CSS,
  author =       "Shaolei Wang and Zhongyuan Wang and Wanxiang Che and
                 Sendong Zhao and Ting Liu",
  title =        "Combining Self-supervised Learning and Active Learning
                 for Disfluency Detection",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "62:1--62:25",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487290",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487290",
  abstract =     "Spoken language is fundamentally different from the
                 written language in that it contains frequent
                 disfluencies or parts of an utterance that are
                 corrected by the speaker. Disfluency detection
                 (removing these disfluencies) is desirable to clean the
                 input \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Warjri:2022:PSP,
  author =       "Sunita Warjri and Partha Pakray and Saralin A. Lyngdoh
                 and Arnab Kumar Maji",
  title =        "Part-of-Speech {(POS)} Tagging Using Deep
                 Learning-Based Approaches on the Designed {Khasi} {POS}
                 Corpus",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "3",
  pages =        "63:1--63:24",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3488381",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Tue Apr 5 06:29:03 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488381",
  abstract =     "Part-of-speech (POS) tagging is one of the research
                 challenging fields in natural language processing
                 (NLP). It requires good knowledge of a particular
                 language with large amounts of data or corpora for
                 feature engineering, which can lead to achieving a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Hastuti:2022:QLS,
  author =       "Rochana Prih Hastuti and Yohanes Suyanto and Anny
                 Kartika Sari",
  title =        "{Q}-Learning for Shift-Reduce Parsing in {Indonesian}
                 Tree-{LSTM}-Based Text Generation",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "64:1--64:15",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3490501",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3490501",
  abstract =     "Tree-LSTM algorithm accommodates tree structure
                 processing to extract information outside the linear
                 sequence pattern. The use of Tree-LSTM in text
                 generation problems requires the help of an external
                 parser at each generation iteration. Developing a good
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Lee:2022:CEB,
  author =       "Lung-Hao Lee and Jian-Hong Li and Liang-Chih Yu",
  title =        "{Chinese} {EmoBank}: Building Valence-Arousal
                 Resources for Dimensional Sentiment Analysis",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "65:1--65:18",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3489141",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3489141",
  abstract =     "An increasing amount of research has recently focused
                 on dimensional sentiment analysis that represents
                 affective states as continuous numerical values on
                 multiple dimensions, such as valence-arousal (VA)
                 space. Compared to the categorical approach that
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Chen:2022:DDG,
  author =       "Shanxiong Chen and Ye Yang and Xuxin Liu and Shiyu
                 Zhu",
  title =        "Dual Discriminator {GAN}: Restoring Ancient Yi
                 Characters",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "66:1--66:23",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3490031",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3490031",
  abstract =     "In China, the damage of ancient Yi books are serious.
                 Due to the lack of ancient Yi experts, the repairation
                 of ancient Yi books is progressing very slowly. The
                 artificial intelligence is successful in the field of
                 image and text, so it is feasible for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jana:2022:HDL,
  author =       "Abhik Jana and Gopalakrishnan Venkatesh and Seid Muhie
                 Yimam and Chris Biemann",
  title =        "Hypernymy Detection for Low-resource Languages: a
                 Study for {Hindi, Bengali, and Amharic}",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "67:1--67:21",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3490389",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3490389",
  abstract =     "Numerous attempts for hypernymy relation (e.g., dog
                 ``is-a'' animal) detection have been made for
                 resourceful languages like English, whereas efforts
                 made for low-resource languages are scarce primarily
                 due to lack of gold-standard datasets and suitable
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mao:2022:LDM,
  author =       "Zhuoyuan Mao and Chenhui Chu and Sadao Kurohashi",
  title =        "Linguistically Driven Multi-Task Pre-Training for
                 Low-Resource Neural Machine Translation",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "68:1--68:29",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3491065",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3491065",
  abstract =     "In the present study, we propose novel
                 sequence-to-sequence pre-training objectives for
                 low-resource machine translation (NMT):
                 Japanese-specific sequence to sequence (JASS) for
                 language pairs involving Japanese as the source or
                 target language, and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Abderrahim:2022:AWS,
  author =       "Mohammed Alaeddine Abderrahim and Mohammed El-Amine
                 Abderrahim",
  title =        "{Arabic} Word Sense Disambiguation for Information
                 Retrieval",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "69:1--69:19",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3510451",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510451",
  abstract =     "In the context of using semantic resources for
                 information retrieval, the relationship and distance
                 between concepts are considered important for word
                 sense disambiguation. In this article, we experiment
                 with Conceptual Density and Random Walk with graph
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ma:2022:ERC,
  author =       "Hongchao Ma and Zhongqing Wang and Xiabing Zhou and
                 Guodong Zhou and Qinglei Zhou",
  title =        "Emotion Recognition with Conversational Generation
                 Transfer",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "70:1--70:17",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494532",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494532",
  abstract =     "Emotion recognition in conversation is one of the
                 essential tasks of natural language processing.
                 However, this task's annotation data is insufficient
                 since such data is hard to collect and annotate.
                 Meanwhile, there is large-scale data for conversational
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Wu:2022:CEE,
  author =       "Xiaohua Wu and Tengrui Wang and Youping Fan and
                 Fangjian Yu",
  title =        "{Chinese} Event Extraction via Graph Attention
                 Network",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "71:1--71:12",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494533",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494533",
  abstract =     "Event extraction plays an important role in natural
                 language processing (NLP) applications, including
                 question answering and information retrieval. Most of
                 the previous state-of-the-art methods were lack of
                 ability in capturing features in long range. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Cui:2022:IGD,
  author =       "Yiming Cui and Wanxiang Che and Ziqing Yang and Ting
                 Liu and Bing Qin and Shijin Wang and Guoping Hu",
  title =        "Interactive Gated Decoder for Machine Reading
                 Comprehension",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "72:1--72:19",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3501399",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3501399",
  abstract =     "Owing to the availability of various large-scale
                 Machine Reading Comprehension (MRC) datasets, building
                 an effective model to extract passage spans for
                 question answering has been well studied in previous
                 works. However, in reality, there are some \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Husain:2022:IEP,
  author =       "Fatemah Husain and Ozlem Uzuner",
  title =        "Investigating the Effect of Preprocessing {Arabic}
                 Text on Offensive Language and Hate Speech Detection",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "73:1--73:20",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3501398",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3501398",
  abstract =     "Preprocessing of input text can play a key role in
                 text classification by reducing dimensionality and
                 removing unnecessary content. This study aims to
                 investigate the impact of preprocessing on Arabic
                 offensive language classification. We explore six
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Gogoi:2022:LLR,
  author =       "Arjun Gogoi and Nomi Baruah",
  title =        "A Lemmatizer for Low-resource Languages: {WSD} and Its
                 Role in the {Assamese} Language",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "74:1--74:22",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502157",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502157",
  abstract =     "The morphological variations of highly inflected
                 languages that appear in a text impede the progress of
                 computer processing and root word determination tasks
                 while extracting an abstract. As a remedy to this
                 difficulty, a lemmatization algorithm is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Harrag:2022:AFN,
  author =       "Fouzi Harrag and Mohamed Khalil Djahli",
  title =        "{Arabic} Fake News Detection: a Fact Checking Based
                 Deep Learning Approach",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "75:1--75:34",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3501401",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3501401",
  abstract =     "Fake news stories can polarize society, particularly
                 during political events. They undermine confidence in
                 the media in general. Current NLP systems are still
                 lacking the ability to properly interpret and classify
                 Arabic fake news. Given the high stakes \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{P:2022:TSS,
  author =       "Jasir M. P. and Kannan Balakrishnan",
  title =        "Text-to-Speech Synthesis: Literature Review with an
                 Emphasis on {Malayalam} Language",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "76:1--76:56",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3501397",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3501397",
  abstract =     "Text-to-Speech Synthesis (TTS) is an active area of
                 research to generate synthetic speech from underlying
                 text. The identified syllables are uttered with proper
                 duration and prosody characteristics to emulate natural
                 speech. It falls under the category of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Qin:2022:MDS,
  author =       "Libo Qin and Fuxuan Wei and Minheng Ni and Yue Zhang
                 and Wanxiang Che and Yangming Li and Ting Liu",
  title =        "Multi-domain Spoken Language Understanding Using
                 Domain- and Task-aware Parameterization",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "77:1--77:17",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502198",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502198",
  abstract =     "Spoken language understanding (SLU) has been addressed
                 as a supervised learning problem, where a set of
                 training data is available for each domain. However,
                 annotating data for a new domain can be both
                 financially costly and non-scalable. One existing
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Qin:2022:ACE,
  author =       "Yanxia Qin and Zhongqing Wang and Yue Zhang and Kehai
                 Chen and Min Zhang",
  title =        "Advancing {Chinese} Event Detection via Revisiting
                 Character Information",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "78:1--78:9",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502197",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502197",
  abstract =     "Recently, character information has been successfully
                 introduced into the encoder-decoder event detection
                 model to relieve the trigger-word mismatch problem,
                 thus achieving impressive results in the languages
                 without natural delimiters (i.e., Chinese). \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Jain:2022:WSD,
  author =       "Goonjan Jain and D. K. Lobiyal",
  title =        "Word Sense Disambiguation using Cooperative Game
                 Theory and Fuzzy {Hindi} {WordNet} based on
                 {ConceptNet}",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "79:1--79:25",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502739",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502739",
  abstract =     "Natural Language is fuzzy in nature. The fuzziness of
                 Hindi language was captured in the Fuzzy Hindi WordNet
                 (FHWN). FHWN assigned membership values to fuzzy
                 relationships by consulting experts from various
                 domains. However, these membership values need
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Manerkar:2022:KWC,
  author =       "Sanjana Manerkar and Kavita Asnani and Preeti
                 Ravindranath Khorjuvenkar and Shilpa Desai and Jyoti D.
                 Pawar",
  title =        "{Konkani WordNet}: Corpus-Based Enhancement using
                 Crowdsourcing",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "80:1--80:18",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3503156",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3503156",
  abstract =     "Konkani is one of the languages included in the eighth
                 schedule of the Indian constitution. It is the official
                 language of Goa and is spoken mainly in Goa and some
                 places in Karnataka and Kerala. Konkani WordNet or
                 Konkani Shabdamalem (komkani 'sabdamalem) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Chen:2022:MMR,
  author =       "Junyi Chen and Lan Du and Ming Liu and Xiabing Zhou",
  title =        "{Mulan}: a Multiple Residual Article-Wise Attention
                 Network for Legal Judgment Prediction",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "81:1--81:15",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3503157",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3503157",
  abstract =     "Legal judgment prediction (LJP) is used to predict
                 judgment results based on the description of individual
                 legal cases. In order to be more suitable for actual
                 application scenarios in which the case has cited
                 multiple articles and has multiple charges, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Guo:2022:HNT,
  author =       "H. Guo and N. Dong and J. Y. Zhao and Y. F. Liu",
  title =        "Handwritten New {Tai Lue} Character Recognition Using
                 Convolutional Prior Features and Deep Variationally
                 Sparse {Gaussian} Process Modeling",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "82:1--82:25",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3506700",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3506700",
  abstract =     "New Tai Lue is widely used in Southwest China and
                 Southeast Asia. Hence, it is important to study related
                 handwritten character recognition. Considering the many
                 similar characters in handwritten New Tai Lue, this
                 paper proposes an offline handwritten New \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Mahajan:2022:WLS,
  author =       "Shilpa Mahajan and Rajneesh Rani",
  title =        "Word Level Script Identification Using Convolutional
                 Neural Network Enhancement for Scenic Images",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "83:1--83:29",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3506699",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3506699",
  abstract =     "Script identification from complex and colorful images
                 is an integral part of the text recognition and
                 classification system. Such images may contain twofold
                 challenges: (1) Challenges related to the camera like
                 blurring effect, non-uniform illumination \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Alshammari:2022:CNS,
  author =       "Nasser O. Alshammari and Fawaz D. Alharbi",
  title =        "Combining a Novel Scoring Approach with {Arabic}
                 Stemming Techniques for {Arabic} Chatbots Conversation
                 Engine",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "4",
  pages =        "84:1--84:21",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3511215",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Wed May 18 08:42:14 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3511215",
  abstract =     "Arabic is recognized as one of the main languages
                 around the world. Many attempts and efforts have been
                 done to provide computing solutions to support the
                 language. Developing Arabic chatbots is still an
                 evolving research field and requires extra efforts
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Shang:2022:IHD,
  author =       "Rui Shang and Xia Li",
  title =        "Improved Heuristic Data Management and Protection
                 Algorithm for Digital {China} Cultural Datasets",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "5",
  pages =        "85:1--85:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3394114",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Mar 17 07:33:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3394114",
  abstract =     "In the present scenario sustainable management and
                 protection of digital cultural datasets are considered
                 as a significant area of research. In the recent past,
                 the protection and management of cultural data are
                 facing several new challenges and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Canhasi:2022:AFN,
  author =       "Ercan Canhasi and Rexhep Shijaku and Erblin Berisha",
  title =        "{Albanian} Fake News Detection",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "5",
  pages =        "86:1--86:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487288",
  ISSN =         "2375-4699 (print), 2375-4702 (electronic)",
  ISSN-L =       "2375-4699",
  bibdate =      "Fri Mar 17 07:33:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tallip.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487288",
  abstract =     "Recent years have witnessed the vast increase of the
                 phenomenon known as the fake news. Among the main
                 reasons for this increase are the continuous growth of
                 internet and social media usage and the real-time
                 information dissemination opportunity offered
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Asian Low-Resour. Lang. Inf. Process.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Asian and Low-Resource Language
                 Information Processing (TALLIP)",
  journal-URL =  "https://dl.acm.org/loi/tallip",
}

@Article{Ahmed:2022:FCS,
  author =       "Usman Ahmed and Jerry Chun-Wei Lin and Gautam
                 Srivastava",
  title =        "Fuzzy Contrast Set Based Deep Attention Network for
                 Lexical Analysis and Mental Health Treatment",
  journal =      j-TALLIP,
  volume =       "21",
  number =       "5",
  pages =        "87:1--87:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "ht