Entry Nguyen:2009:WSC from talip.bib

Last update: Sun Oct 15 02:55:04 MDT 2017                Valid HTML 3.2!

Index sections

Top | Symbols | Numbers | Math | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z

BibTeX entry

@Article{Nguyen:2009:WSC,
  author =       "Cam-Tu Nguyen and Xuan-Hieu Phan and Susumu Horiguchi
                 and Thu-Trang Nguyen and Quang-Thuy Ha",
  title =        "{Web} Search Clustering and Labeling with Hidden
                 Topics",
  journal =      j-TALIP,
  volume =       "8",
  number =       "3",
  pages =        "12:1--12:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1568292.1568295",
  ISSN =         "1530-0226 (print), 1558-3430 (electronic)",
  ISSN-L =       "1530-0226",
  bibdate =      "Mon Mar 29 15:37:08 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/talip.bib",
  abstract =     "Web search clustering is a solution to reorganize
                 search results (also called ``snippets'') in a more
                 convenient way for browsing. There are three key
                 requirements for such post-retrieval clustering
                 systems: (1) the clustering algorithm should group
                 similar documents together; (2) clusters should be
                 labeled with descriptive phrases; and (3) the
                 clustering system should provide high-quality
                 clustering without downloading the whole Web
                 page.\par

                 This article introduces a novel framework for
                 clustering Web search results in Vietnamese which
                 targets the three above issues. The main motivation is
                 that by enriching short snippets with hidden topics
                 from huge resources of documents on the Internet, it is
                 able to cluster and label such snippets effectively in
                 a topic-oriented manner without concerning whole Web
                 pages. Our approach is based on recent successful topic
                 analysis models, such as Probabilistic-Latent Semantic
                 Analysis, or Latent Dirichlet Allocation. The
                 underlying idea of the framework is that we collect a
                 very large external data collection called ``universal
                 dataset,'' and then build a clustering system on both
                 the original snippets and a rich set of hidden topics
                 discovered from the universal data collection. This can
                 be seen as a richer representation of snippets to be
                 clustered. We carry out careful evaluation of our
                 method and show that our method can yield impressive
                 clustering quality.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Asian Language Information
                 Processing",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J820",
  keywords =     "cluster labeling; collocation; hidden topics analysis;
                 Hierarchical Agglomerative Clustering; Latent Dirichlet
                 allocation; Vietnamese; Web search clustering",
}

Related entries