Valid HTML 4.0! Valid CSS!
%%% -*-BibTeX-*-
%%% ====================================================================
%%% BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.72",
%%%     date            = "31 October 2024",
%%%     time            = "10:51:04 MDT",
%%%     filename        = "tomccap.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "https://www.math.utah.edu/~beebe",
%%%     checksum        = "64171 57585 287984 2790223",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "bibliography; BibTeX; ACM Transactions on
%%%                        Multimedia Computing, Communications, and
%%%                        Applications; TOMCCAP; TOMM",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        ACM Transactions on Multimedia Computing,
%%%                        Communications, and Applications (CODEN
%%%                        ????, ISSN 1551-6857), completely covering
%%%                        all issues from volume 1, number 1, February
%%%                        2005 to date.
%%%
%%%                        NB: On 23-May-2014, the journal acronym was
%%%                        changed by ACM from TOMCCAP to TOMM, but the
%%%                        full journal name remains unchanged, and
%%%                        volume / number / pages values are not
%%%                        affected by the change.  The BibTeX journal
%%%                        abbreviation has therefore changed at volume
%%%                        10, number 4, June 2014, from j-TOMMCAP to
%%%                        j-TOMM.  The filename remains tommcap.bib; no
%%%                        tomm.bib exists at the master archive site.
%%%
%%%                        The ACM maintains World Wide Web pages with
%%%                        journal tables of contents for 2005--date at
%%%
%%%                            http://www.acm.org/tomccap/
%%%                            http://www.acm.org/pubs/contents/journals/tomccap/
%%%                            http://portal.acm.org/browse_dl.cfm?idx=J961
%%%
%%%                        That data has been automatically converted to
%%%                        BibTeX form, corrected for spelling and page
%%%                        number errors, and merged into this file.
%%%
%%%                        At version 1.72, the COMPLETE year coverage
%%%                        looks like this:
%%%
%%%                             2005 (  20)    2012 (  56)    2019 (  62)
%%%                             2006 (  18)    2013 (  62)    2020 ( 122)
%%%                             2007 (  27)    2014 (  55)    2021 ( 168)
%%%                             2008 (  45)    2015 (  51)    2022 ( 153)
%%%                             2009 (  14)    2016 (  31)    2023 ( 209)
%%%                             2010 (  31)    2017 (  63)    2024 ( 252)
%%%                             2011 (  41)    2018 (  75)
%%%
%%%                             Article:       1555
%%%
%%%                             Total entries: 1555
%%%
%%%                        Spelling has been verified with the UNIX
%%%                        spell and GNU ispell programs using the
%%%                        exception dictionary stored in the companion
%%%                        file with extension .sok.
%%%
%%%                        ACM copyrights explicitly permit abstracting
%%%                        with credit, so article abstracts, keywords,
%%%                        and subject classifications have been
%%%                        included in this bibliography wherever
%%%                        available.  Article reviews have been
%%%                        omitted, until their copyright status has
%%%                        been clarified.
%%%
%%%                        bibsource keys in the bibliography entries
%%%                        below indicate the entry originally came
%%%                        from the computer science bibliography
%%%                        archive, even though it has likely since
%%%                        been corrected and updated.
%%%
%%%                        URL keys in the bibliography point to
%%%                        World Wide Web locations of additional
%%%                        information about the entry.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed by the
%%%                        author for the BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted
%%%                        by journal, and then by publication order,
%%%                        with the help of ``bibsort -byvolume''.  The
%%%                        bibsort utility is available from
%%%                        ftp://ftp.math.utah.edu/pub/tex/bib.
%%%
%%%                        The author will be grateful for reports of
%%%                        errors of any kind in this bibliography.
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility."
%%%     }
%%% ====================================================================
@Preamble{"\input bibnames.sty" #
   "\ifx \Thorn \undefined \def \Thorn {T}\fi" #
   "\hyphenation{
   }"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:
@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|https://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% From the ACM Portal Web site: ``On 23rd May 2014, ACM TOMCCAP
%%% changed its acronym to ACM TOMM. This acronym change was the result
%%% of extensive discussions between the journal Editorial Board and
%%% SIGMM constituents dating back to 2011. This name change emphasizes
%%% the continued strong collaboration with the ACM Multimedia
%%% conference (ACMMM).''
%%%
%%% Journal abbreviations:
@String{j-TOMCCAP               = "ACM Transactions on Multimedia Computing,
                                  Communications, and Applications"}

@String{j-TOMM                  = "ACM Transactions on Multimedia Computing,
                                  Communications, and Applications"}

%%% ====================================================================
%%% Bibliography entries sorted in publication order:
@Article{Georganas:2005:EBA,
  author =       "Nicolas D. Georganas",
  title =        "{Editorial}: {The} birth of the {ACM Transactions on
                 Multimedia Computing, Communications and Applications}
                 {(TOMCCAP)}",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "1",
  pages =        "1--2",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Apr 14 11:01:03 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Rowe:2005:ASR,
  author =       "Lawrence A. Rowe and Ramesh Jain",
  title =        "{ACM SIGMM Retreat} report on future directions in
                 multimedia research",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "1",
  pages =        "3--13",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Apr 14 11:01:03 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Jain:2005:GEI,
  author =       "Ramesh Jain and Thomas Plagemann and Ralf Steinmetz",
  title =        "Guest editorial: {The International ACM Multimedia
                 Conference 1993} --- ten years after",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "1",
  pages =        "14--15",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Apr 14 11:01:03 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Teodosio:2005:SS,
  author =       "Laura Teodosio and Walter Bender",
  title =        "Salient stills",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "1",
  pages =        "16--36",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Apr 14 11:01:03 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Reddy:2005:DSM,
  author =       "A. L. N. Reddy and Jim Wyllie and K. B. R.
                 Wijayaratne",
  title =        "Disk scheduling in a multimedia {I/O} system",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "1",
  pages =        "37--59",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Apr 14 11:01:03 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Buchanan:2005:ATL,
  author =       "M. Cecelia Buchanan and Polle T. Zellweger",
  title =        "Automatic temporal layout mechanisms revisited",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "1",
  pages =        "60--88",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Apr 14 11:01:03 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Bulterman:2005:SMA,
  author =       "Dick C. A. Bulterman and Lynda Hardman",
  title =        "Structured multimedia authoring",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "1",
  pages =        "89--109",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Apr 14 11:01:03 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Mayer-Patel:2005:BSM,
  author =       "Ketan Mayer-Patel and Brian C. Smith and Lawrence A.
                 Rowe",
  title =        "The {Berkeley} software {MPEG-1} video decoder",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "1",
  pages =        "110--125",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Apr 14 11:01:03 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Plagemann:2005:SPA,
  author =       "Thomas Plagemann and Prashant Shenoy and John R.
                 Smith",
  title =        "Selected papers from the {ACM Multimedia Conference
                 2003}",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "2",
  pages =        "127--127",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Jul 7 13:52:13 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Kum:2005:RTM,
  author =       "Sang-Uok Kum and Ketan Mayer-Patel",
  title =        "Real-time multidepth stream compression",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "2",
  pages =        "128--150",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Jul 7 13:52:13 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Feng:2005:PSL,
  author =       "Wu-Chi Feng and Ed Kaiser and Wu Chang Feng and Mikael
                 Le Baillif",
  title =        "{Panoptes}: scalable low-power video sensor networking
                 technologies",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "2",
  pages =        "151--167",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Jul 7 13:52:13 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Goh:2005:SFD,
  author =       "Kingshy Goh and Beitao Li and Edward Y. Chang",
  title =        "Semantics and feature discovery via confidence-based
                 ensemble",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "2",
  pages =        "168--189",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Jul 7 13:52:13 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Baker:2005:UPC,
  author =       "H. Harlyn Baker and Nina Bhatti and Donald Tanguay and
                 Irwin Sobel and Dan Gelb and Michael E. Goss and W.
                 Bruce Culbertson and Thomas Malzbender",
  title =        "Understanding performance in {Coliseum}, an immersive
                 videoconferencing system",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "2",
  pages =        "190--210",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Jul 7 13:52:13 MDT 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Adams:2005:IIM,
  author =       "Brett Adams and Svetha Venkatesh and Ramesh Jain",
  title =        "{IMCE}: {Integrated} media creation environment",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "3",
  pages =        "211--247",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Fri Nov 18 08:30:19 MST 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Poellabauer:2005:FCD,
  author =       "Christian Poellabauer and Karsten Schwan",
  title =        "Flexible cross-domain event delivery for
                 quality-managed multimedia applications",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "3",
  pages =        "248--268",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Fri Nov 18 08:30:19 MST 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Cooper:2005:TEC,
  author =       "Matthew Cooper and Jonathan Foote and Andreas
                 Girgensohn and Lynn Wilcox",
  title =        "Temporal event clustering for digital photo
                 collections",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "3",
  pages =        "269--288",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Fri Nov 18 08:30:19 MST 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Li:2005:CEM,
  author =       "Keqiu Li and Hong Shen",
  title =        "Coordinated enroute multimedia object caching in
                 transcoding proxies for tree networks",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "3",
  pages =        "289--314",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Fri Nov 18 08:30:19 MST 2005",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wu:2005:AFE,
  author =       "Huahui Wu and Mark Claypool and Robert Kinicki",
  title =        "Adjusting forward error correction with temporal
                 scaling for {TCP}-friendly streaming {MPEG}",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "4",
  pages =        "315--337",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Cai:2005:LUL,
  author =       "Jianfei Cai and Xiangjun Li and Chang Wen Chen",
  title =        "Layered unequal loss protection with pre-interleaving
                 for fast progressive image transmission over
                 packet-loss channels",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "4",
  pages =        "338--353",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Tu:2005:ASP,
  author =       "Yi-Cheng Tu and Jianzhong Sun and Mohamed Hefeeda and
                 Sunil Prabhakar",
  title =        "An analytical study of peer-to-peer media streaming
                 systems",
  journal =      j-TOMCCAP,
  volume =       "1",
  number =       "4",
  pages =        "354--376",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lew:2006:CBM,
  author =       "Michael S. Lew and Nicu Sebe and Chabane Djeraba and
                 Ramesh Jain",
  title =        "Content-based multimedia information retrieval:
                 {State} of the art and challenges",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "1",
  pages =        "1--19",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{DelBimbo:2006:CBR,
  author =       "Alberto {Del Bimbo} and Pietro Pala",
  title =        "Content-based retrieval of {$3$D} models",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "1",
  pages =        "20--43",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Xu:2006:FAF,
  author =       "Huaxin Xu and Tat-Seng Chua",
  title =        "Fusion of {AV} features and external information
                 sources for event detection in team sports video",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "1",
  pages =        "44--67",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Joshi:2006:SPE,
  author =       "Dhiraj Joshi and James Z. Wang and Jia Li",
  title =        "The {Story Picturing Engine}---a system for automatic
                 text illustration",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "1",
  pages =        "68--89",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Snoek:2006:LRS,
  author =       "Cees G. M. Snoek and Marcel Worring and Alexander G.
                 Hauptmann",
  title =        "Learning rich semantics from news video archives by
                 style analysis",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "2",
  pages =        "91--108",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yang:2006:SER,
  author =       "Guang Yang and Tony Sun and Mario Gerla and M. Y.
                 Sanadidi and Ling-Jyh Chen",
  title =        "Smooth and efficient real-time video transport in the
                 presence of wireless errors",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "2",
  pages =        "109--126",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Shao:2006:ASM,
  author =       "Xi Shao and Changsheng Xu and Namunu C. Maddage and Qi
                 Tian and Mohan S. Kankanhalli and Jesse S. Jin",
  title =        "Automatic summarization of music videos",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "2",
  pages =        "127--148",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Eide:2006:RTV,
  author =       "Viktor S. Wold Eide and Ole-Christoffer Granmo and
                 Frank Eliassen and J{\o}rgen Andreas Michaelsen",
  title =        "Real-time video content analysis: {QoS}-aware
                 application composition and parallel processing",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "2",
  pages =        "149--172",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Candan:2006:ISI,
  author =       "K. Sel{\c{c}}uk Candan and Augusto Celentano and
                 Wolfgang Klas",
  title =        "Introduction to special issue on the use of context in
                 multimedia information systems",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "3",
  pages =        "173--176",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ferrara:2006:SWO,
  author =       "Alfio Ferrara and Luca A. Ludovico and Stefano
                 Montanelli and Silvana Castano and Goffredo Haus",
  title =        "A {Semantic Web} ontology for context-based
                 classification and retrieval of music resources",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "3",
  pages =        "177--198",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Arigon:2006:HMP,
  author =       "Anne-Muriel Arigon and Anne Tchounikine and Maryvonne
                 Miquel",
  title =        "Handling multiple points of view in a multimedia data
                 warehouse",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "3",
  pages =        "199--218",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Kahol:2006:MCH,
  author =       "Kanav Kahol and Priyamvada Tripathi and Troy Mcdaniel
                 and Laura Bratton and Sethuraman Panchanathan",
  title =        "Modeling context in haptic perception, rendering, and
                 visualization",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "3",
  pages =        "219--240",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Thu Sep 7 16:13:26 MDT 2006",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Gulliver:2006:DUP,
  author =       "Stephen R. Gulliver and Gheorghita Ghinea",
  title =        "Defining user perception of distributed multimedia
                 quality",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "4",
  pages =        "241--257",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Gopalan:2006:SAC,
  author =       "Kartik Gopalan and Lan Huang and Gang Peng and
                 Tzi-Cker Chiueh and Yow-Jian Lin",
  title =        "Statistical admission control using delay distribution
                 measurements",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "4",
  pages =        "258--281",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Li:2006:MSP,
  author =       "H. Li and M. Li and B. Prabhakaran",
  title =        "Middleware for streaming {$3$D} progressive meshes
                 over lossy networks",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "4",
  pages =        "282--317",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Etsion:2006:PPU,
  author =       "Yoav Etsion and Dan Tsafrir and Dror G. Feitelson",
  title =        "Process prioritization using output production:
                 {Scheduling} for multimedia",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "4",
  pages =        "318--342",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Cesar:2006:GAH,
  author =       "Pablo Cesar and Petri Vuorimaa and Juha Vierinen",
  title =        "A graphics architecture for high-end interactive
                 television terminals",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "4",
  pages =        "343--357",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Madhwacharyula:2006:MHV,
  author =       "Chitra L. Madhwacharyula and Marc Davis and Philippe
                 Mulhem and Mohan S. Kankanhalli",
  title =        "Metadata handling: a video perspective",
  journal =      j-TOMCCAP,
  volume =       "2",
  number =       "4",
  pages =        "358--388",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Atrey:2007:GOO,
  author =       "Pradeep K. Atrey and Mohan S. Kankanhalli and John B.
                 Oommen",
  title =        "Goal-oriented optimal subset selection of correlated
                 multimedia streams",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "1",
  pages =        "??--??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chen:2007:DSI,
  author =       "Datong Chen and Jie Yang and Robert Malkin and Howard
                 D. Wactlar",
  title =        "Detecting social interactions of the elderly in a
                 nursing home environment",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "1",
  pages =        "??--??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Heck:2007:VV,
  author =       "Rachel Heck and Michael Wallick and Michael Gleicher",
  title =        "Virtual videography",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "1",
  pages =        "??--??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Truong:2007:VAS,
  author =       "Ba Tu Truong and Svetha Venkatesh",
  title =        "Video abstraction: a systematic review and
                 classification",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "1",
  pages =        "??--??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Xu:2007:CAD,
  author =       "Changsheng Xu and Namunu C. Maddage and Xi Shao and Qi
                 Tian",
  title =        "Content-adaptive digital music watermarking based on
                 music structure analysis",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "1",
  pages =        "??--??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yan:2007:MSO,
  author =       "Wei-Qi Yan and Mohan S. Kankanhalli",
  title =        "Multimedia simplification for optimized {MMS}
                 synthesis",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "1",
  pages =        "??--??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Apr 14 11:19:17 MDT 2007",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2007:CAT,
  author =       "Tiecheng Liu and John R. Kender",
  title =        "Computational approaches to temporal sampling of video
                 sequences",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "2",
  pages =        "7:1--7:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1230812.1230813",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:04 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Video key frame extraction is one of the most
                 important research problems for video summarization,
                 indexing, and retrieval. For a variety of applications
                 such as ubiquitous media access and video streaming,
                 the temporal boundaries between video key frames are
                 required for synchronizing visual content with audio.
                 In this article, we define temporal video sampling as a
                 unified process of extracting video key frames and
                 computing their temporal boundaries, and formulate it
                 as an optimization problem. We first provide an optimal
                 approach that minimizes temporal video sampling error
                 using a dynamic programming process. The optimal
                 approach retrieves a key frame hierarchy and all
                 temporal boundaries in $ O(n^4) $ time and $ O(n^2) $
                 space. To further reduce computational complexity, we
                 also provide a suboptimal greedy algorithm that
                 exploits the data structure of a binary heap and uses a
                 novel ``look-ahead'' computational technique, enabling
                 all levels of key frames to be extracted with an
                 average-case computational time of $ O(n \log n) $ and
                 memory usage of $ O(n) $. Both the optimal and the
                 greedy methods are free of parameters, thus avoiding
                 the threshold-selection problem that exists in other
                 approaches. We empirically compare the proposed optimal
                 and greedy methods with several existing methods in
                 terms of video sampling error, computational cost, and
                 subjective quality. An evaluation of eight videos of
                 different genres shows that the greedy approach
                 achieves performance very close to that of the optimal
                 approach while drastically reducing computational cost,
                 making it suitable for processing long video sequences
                 in large video databases.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "key frame selection; temporal video sampling;
                 ubiquitous media access; video content analysis; video
                 summarization",
}

@Article{Moncrieff:2007:OAB,
  author =       "Simon Moncrieff and Svetha Venkatesh and Geoff West",
  title =        "Online audio background determination for complex
                 audio environments",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "2",
  pages =        "8:1--8:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1230812.1230814",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:04 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We present a method for foreground/background
                 separation of audio using a background modelling
                 technique. The technique models the background in an
                 online, unsupervised, and adaptive fashion, and is
                 designed for application to long term surveillance and
                 monitoring problems. The background is determined using
                 a statistical method to model the states of the audio
                 over time. In addition, three methods are used to
                 increase the accuracy of background modelling in
                 complex audio environments. Such environments can cause
                 the failure of the statistical model to accurately
                 capture the background states. An entropy-based
                 approach is used to unify background representations
                 fragmented over multiple states of the statistical
                 model. The approach successfully unifies such
                 background states, resulting in a more robust
                 background model. We adaptively adjust the number of
                 states considered background according to background
                 complexity, resulting in the more accurate
                 classification of background models. Finally, we use an
                 auxiliary model cache to retain potential background
                 states in the system. This prevents the deletion of
                 such states due to a rapid influx of observed states
                 that can occur for highly dynamic sections of the audio
                 signal. The separation algorithm was successfully
                 applied to a number of audio environments representing
                 monitoring applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "audio analysis; online background modelling;
                 surveillance and monitoring",
}

@Article{Oshima:2007:PDS,
  author =       "Chika Oshima and Kazushi Nishimoto and Norihiro
                 Hagita",
  title =        "A piano duo support system for parents to lead
                 children to practice musical performances",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "2",
  pages =        "9:1--9:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1230812.1230815",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:04 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article, we propose ``Family Ensemble,'' a
                 piano duo support system for a musically inept parent
                 and his/her child who is a beginner at playing the
                 piano. The system makes it easier for parents to
                 correctly reproduce a given sequence of pitches along
                 with the child's performance by using score tracking
                 and note-replacement functions. The experiments with
                 this support system showed that the parents can
                 immediately participate in the piano duo. Furthermore,
                 we found that during joint practices using Family
                 Ensemble some subjects discussed musical ideas that
                 they would not have talked about without using the
                 system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "entertainment; musical expression; piano duo; score
                 tracking; support system",
}

@Article{He:2007:CSW,
  author =       "Xiaofei He and Deng Cai and Ji-Rong Wen and Wei-Ying
                 Ma and Hong-Jiang Zhang",
  title =        "Clustering and searching {WWW} images using link and
                 page layout analysis",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "2",
  pages =        "10:1--10:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1230812.1230816",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:04 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Due to the rapid growth of the number of digital
                 images on the Web, there is an increasing demand for an
                 effective and efficient method for organizing and
                 retrieving the available images. This article describes
                 iFind, a system for clustering and searching WWW
                 images. By using a vision-based page segmentation
                 algorithm, a Web page is partitioned into blocks, and
                 the textual and link information of an image can be
                 accurately extracted from the block containing that
                 image. The textual information is used for image
                 indexing. By extracting the page-to-block,
                 block-to-image, block-to-page relationships through
                 link structure and page layout analysis, we construct
                 an image graph. Our method is less sensitive to noisy
                 links than previous methods like PageRank, HITS, and
                 PicASHOW, and hence the image graph can better reflect
                 the semantic relationship between images. Using the
                 notion of Markov Chain, we can compute the limiting
                 probability distributions of the images, ImageRanks,
                 which characterize the importance of the images. The
                 ImageRanks are combined with the relevance scores to
                 produce the final ranking for image search. With the
                 graph models, we can also use techniques from spectral
                 graph theory for image clustering and embedding, or 2-D
                 visualization. Some experimental results on 11.6
                 million images downloaded from the Web are provided in
                 the article.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "image clustering; image search; link analysis; Web
                 mining",
}

@Article{Jung:2007:NBA,
  author =       "Byunghee Jung and Junehwa Song and Yoonjoon Lee",
  title =        "A narrative-based abstraction framework for
                 story-oriented video",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "2",
  pages =        "11:1--11:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1230812.1230817",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:04 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article proposes a novel video abstraction
                 framework for online review services of story-oriented
                 videos such as dramas. Among the many genres of TV
                 programs, a drama is one of the most popularly watched
                 on the Web. The abstracts generated by the proposed
                 framework not only give a summary of a video but also
                 effectively help viewers understand the overall story.
                 In addition, our method is duration-flexible. We get
                 clues about human understanding of a story from
                 scenario writing rules and editorial techniques that
                 are popularly used in the process of video production
                 to explicitly express a narrative, and propose a new
                 video abstraction model, called a Narrative Abstraction
                 Model. The model effectively captures the narrative
                 structure embedded in a story-oriented video and
                 articulates the progress of the story in a weighted
                 directed graph, called a Narrative Structure Graph
                 (NSG). The model provides a basis for a flexible
                 framework for abstract generation using the NSG as the
                 intermediary representation of a video. Different
                 abstracts can be appropriately generated based upon
                 different user requirements. To show the effectiveness
                 of the proposed model and method, we developed a video
                 abstraction system realizing the framework, and
                 successfully applied it to large volumes of TV dramas.
                 The evaluation results show that the proposed framework
                 is a feasible solution for online review services.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "film; narrative structure; online review services;
                 story understanding; story-oriented; video abstraction;
                 video abstraction system",
}

@Article{Shacham:2007:UDP,
  author =       "Ron Shacham and Henning Schulzrinne and Srisakul
                 Thakolsri and Wolfgang Kellerer",
  title =        "Ubiquitous device personalization and use: {The} next
                 generation of {IP} multimedia communications",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "2",
  pages =        "12:1--12:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1230812.1230818",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:04 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Service usage in emerging ubiquitous environments
                 includes seamless and personalized usage of public and
                 private devices discovered in the vicinity of a user.
                 In our work, we describe an architecture for device
                 discovery, device configuration, and the transfer of
                 active sessions between devices. The presented
                 architecture uses the Session Initiation Protocol (SIP)
                 as a standardized, widely used signaling protocol for
                 IP-based multimedia services. Our solution includes
                 support of simple existing devices, split of sessions
                 between devices, user-control of location-based
                 behavior, and handling of security and privacy
                 concerns. We present the implementation and show the
                 feasibility of our work with analytical evaluation and
                 measurements.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Internet multimedia; location-based services; mobile
                 communications; ubiquitous computing",
}

@Article{Chen:2007:EMO,
  author =       "Herng-Yow Chen and Sheng-Wei Li",
  title =        "Exploring many-to-one speech-to-text correlation for
                 {Web}-based language learning",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "3",
  pages =        "13:1--13:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1236471.1236472",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article investigates the correlations between
                 multimedia objects (particularly speech and text)
                 involved in language lectures in order to design an
                 effective presentation mechanism for web-based
                 learning. The cross-media correlations are classified
                 into implicit relations (retrieved by computing) and
                 explicit relations (recorded during the preprocessing
                 stage). The implicit temporal correlation between
                 speech and text is primarily to help to negotiate
                 supplementary lecture navigations like tele-pointer
                 movement, lips-sync movement, and content scrolling. We
                 propose a speech-text alignment framework, using an
                 iterative algorithm based on local alignment, to probe
                 many-to-one temporal correlations, and not the
                 one-to-one only. The proposed framework is a more
                 practical method for analyzing general language
                 lectures, and the algorithm's time complexity conforms
                 to the best-possible computation cost, O(nm), without
                 introducing additional computation. In addition, we
                 have shown the feasibility of creating vivid
                 presentations by exploiting implicit relations and
                 artificially simulating some explicit media. To
                 facilitate the navigation of integrated multimedia
                 documents, we develop several visualization techniques
                 for describing media correlations, including guidelines
                 for speech-text correlations, visible-automatic
                 scrolling, and levels of detail of timeline, to provide
                 intuitive and easy-to-use random access mechanisms. We
                 evaluated the performance of the analysis method and
                 human perceptions of the synchronized presentation. The
                 overall performance of the analysis method is that
                 about 99.5\% of the words analyzed are of a temporal
                 error within 0.5 sec and the subjective evaluation
                 result shows that the synchronized presentation is
                 highly acceptable to human beings.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "analysis and presentation; computed synchronization;
                 cross-media correlation; lips sync; speech-to-text
                 alignment",
}

@Article{Wang:2007:EST,
  author =       "Surong Wang and Manoranjan Dash and Liang-Tien Chia
                 and Min Xu",
  title =        "Efficient sampling of training set in large and noisy
                 multimedia data",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "3",
  pages =        "14:1--14:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1236471.1236473",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "As the amount of multimedia data is increasing
                 day-by-day thanks to less expensive storage devices and
                 increasing numbers of information sources, machine
                 learning algorithms are faced with large-sized and
                 noisy datasets. Fortunately, the use of a good sampling
                 set for training influences the final results
                 significantly. But using a simple random sample (SRS)
                 may not obtain satisfactory results because such a
                 sample may not adequately represent the large and noisy
                 dataset due to its blind approach in selecting samples.
                 The difficulty is particularly apparent for huge
                 datasets where, due to memory constraints, only very
                 small sample sizes are used. This is typically the case
                 for multimedia applications, where data size is usually
                 very large. In this article we propose a new and
                 efficient method to sample of large and noisy
                 multimedia data. The proposed method is based on a
                 simple distance measure that compares the histograms of
                 the sample set and the whole set in order to estimate
                 the representativeness of the sample. The proposed
                 method deals with noise in an elegant manner which SRS
                 and other methods are not able to deal with. We
                 experiment on image and audio datasets. Comparison with
                 SRS and other methods shows that the proposed method is
                 vastly superior in terms of sample representativeness,
                 particularly for small sample sizes although time-wise
                 it is comparable to SRS, the least expensive method in
                 terms of time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "audio event identification; histogram; image
                 classification; noise; sampling",
}

@Article{Zhou:2007:CCO,
  author =       "Suiping Zhou and Wentong Cai and Stephen J. Turner and
                 Bu-Sung Lee and Junhu Wei",
  title =        "Critical causal order of events in distributed virtual
                 environments",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "3",
  pages =        "15:1--15:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1236471.1236474",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We investigate the causal order of events in
                 distributed virtual environments (DVEs). We first
                 define the critical causal order relation among the
                 events. Then, we propose some mechanisms to enhance the
                 prevalent RO (receive order delivery) mechanism in DVEs
                 so that the real-time property of DVEs is preserved
                 while the critical causal order violations are reduced.
                 These mechanisms are implemented as a middleware.
                 Experimental results show that the middleware performs
                 well in reducing the critical causality violations in
                 simulation and incurs little processing overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "causal order; distributed simulation; virtual
                 environments",
}

@Article{Li:2007:SRM,
  author =       "Chuanjun Li and S. Q. Zheng and B. Prabhakaran",
  title =        "Segmentation and recognition of motion streams by
                 similarity search",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "3",
  pages =        "16:1--16:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1236471.1236475",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Fast and accurate recognition of motion data streams
                 from gesture sensing and motion capture devices has
                 many applications and is the focus of this article.
                 Based on the analysis of the geometric structures
                 revealed by singular value decompositions (SVD) of
                 motion data, a similarity measure is proposed for
                 simultaneously segmenting and recognizing motion
                 streams. A direction identification approach is
                 explored to further differentiate motions with similar
                 data geometric structures. Experiments show that the
                 proposed similarity measure can segment and recognize
                 motion streams of variable lengths with high accuracy,
                 without knowing beforehand the number of motions in a
                 stream.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "gesture recognition; motion capture; pattern analysis;
                 principal component analysis; segmentation; similarity
                 measures; singular value decomposition",
}

@Article{Ott:2007:OAT,
  author =       "David E. Ott and Ketan Mayer-Patel",
  title =        "An open architecture for transport-level protocol
                 coordination in distributed multimedia applications",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "3",
  pages =        "17:1--17:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1236471.1236476",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We consider the problem of flow coordination in
                 distributed multimedia applications. Most
                 transport-level protocols are designed to operate
                 independently and lack mechanisms for sharing
                 information with other flows and coordinating data
                 transport in various ways. This limitation becomes
                 problematic in distributed applications that employ
                 numerous flows between two computing clusters sharing
                 the same intermediary forwarding path across the
                 Internet. In this article, we propose an open
                 architecture that supports the sharing of network state
                 information, peer flow information, and
                 application-specific information. Called simply the
                 coordination protocol (CP), the scheme facilitates
                 coordination of network resource usage across flows
                 belonging to the same application, as well as aiding
                 other types of coordination. The effectiveness of our
                 approach is illustrated in the context of
                 multistreaming in 3D tele-immersion where consistency
                 of network information across flows both greatly
                 improves frame transport synchrony and minimizes
                 buffering delay.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "distributed applications; flow coordination; network
                 protocols",
}

@Article{Sakr:2007:RCB,
  author =       "Ziad Sakr and Nicolas D. Georganas",
  title =        "Robust content-based {MPEG}-4 {XMT} scene structure
                 authentication and multimedia content location",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "3",
  pages =        "18:1--18:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1236471.1236477",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:10:32 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "For the past decade, there have been numerous research
                 works focusing on the protection of digital images,
                 audio, video, 3D virtual scenes, and software data from
                 unauthorized use and distribution. With the emerging
                 technology of the MPEG-4 standard, MPEG-4 scenes that
                 may include images, video, audio, and 3D objects can
                 easily be built using the text-based MPEG-4 XMT
                 standard. XMT allows content authors to exchange their
                 content with other authors, tools, or service providers
                 and facilitates interoperability with MPEG-4, X3D, and
                 SMIL. In order for owners and designers to protect
                 and/or authenticate their work, some form of security
                 needs to be applied into the MPEG-4 XMT structure and
                 its media content. Unlike images or videos,
                 watermarking an XMT structure is not an easy task,
                 since the structure contains no noise components to
                 embed the watermark. This article is the first one
                 proposing a novel robust algorithm for the
                 authentication of a given MPEG-4 XMT structured scene
                 and the location of its multimedia content.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "MPEG-4; multimedia; polynomial; pseudorandom
                 sequences; steganography; VRML; watermarking; XML;
                 XMT",
}

@Article{Ghinea:2007:ISI,
  author =       "Gheorghita Ghinea and Chabane Djeraba and Stephen
                 Gulliver and Kara Pernice Coyne",
  title =        "Introduction to special issue on eye-tracking
                 applications in multimedia systems",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "4",
  pages =        "1:1--1:4",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1314303.1314304",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:11:20 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Colombo:2007:RTR,
  author =       "Carlo Colombo and Dario Comanducci and Alberto {Del
                 Bimbo}",
  title =        "Robust tracking and remapping of eye appearance with
                 passive computer vision",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "4",
  pages =        "2:1--2:20",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1314303.1314305",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:11:20 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "A single-camera iris-tracking and remapping approach
                 based on passive computer vision is presented. Tracking
                 is aimed at obtaining accurate and robust measurements
                 of the iris/pupil position. To this purpose, a robust
                 method for ellipse fitting is used, employing search
                 constraints so as to achieve better performance with
                 respect to the standard RANSAC algorithm. Tracking also
                 embeds an iris localization algorithm (working as a
                 bootstrap multiple-hypotheses generation step), and a
                 blink detector that can detect voluntary eye blinks in
                 human-computer interaction applications. On-screen
                 remapping incorporates a head-tracking method capable
                 of compensating for small user-head movements. The
                 approach operates in real time under different light
                 conditions and in the presence of distractors. An
                 extensive set of experiments is presented and
                 discussed. In particular, an evaluation method for the
                 choice of layout of both hardware components and
                 calibration points is described. Experiments also
                 investigate the importance of providing a visual
                 feedback to the user, and the benefits gained from
                 performing head compensation, especially during
                 image-to-screen map calibration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "eye blink detection; eye tracking and remapping;
                 eye-driven human-computer interaction; robust fitting",
}

@Article{Wang:2007:UGP,
  author =       "Jun Wang and Lijun Yin and Jason Moore",
  title =        "Using geometric properties of topographic manifold to
                 detect and track eyes for human-computer interaction",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "4",
  pages =        "3:1--3:20",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1314303.1314306",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:11:20 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Automatic eye detection and tracking is an important
                 component for advanced human-computer interface design.
                 Accurate eye localization can help develop a successful
                 system for face recognition and emotion identification.
                 In this article, we propose a novel approach to detect
                 and track eyes using geometric surface features on
                 topographic manifold of eye images. First, in the joint
                 spatial-intensity domain, a facial image is treated as
                 a 3D terrain surface or image topographic manifold. In
                 particular, eye regions exhibit certain intrinsic
                 geometric traits on this topographic manifold, namely,
                 the pit -labeled center and hillside -like surround
                 regions. Applying a terrain classification procedure on
                 the topographic manifold of facial images, each
                 location of the manifold can be labeled to generate a
                 terrain map. We use the distribution of terrain labels
                 to represent the eye terrain pattern. The Bhattacharyya
                 affinity is employed to measure the distribution
                 similarity between two topographic manifolds. Based on
                 the Bhattacharyya kernel, a support vector machine is
                 applied for selecting proper eye pairs from the
                 pit-labeled candidates. Second, given detected eyes on
                 the first frame of a video sequence, a
                 mutual-information-based fitting function is defined to
                 describe the similarity between two terrain surfaces of
                 neighboring frames. By optimizing the fitting function,
                 eye locations are updated for subsequent frames. The
                 distinction of the proposed approach lies in that both
                 eye detection and eye tracking are performed on the
                 derived topographic manifold, rather than on an
                 original-intensity image domain. The robustness of the
                 approach is demonstrated under various imaging
                 conditions and with different facial appearances, using
                 both static images and video sequences without
                 background constraints.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Bhattacharyya affinity; eye detection; eye tracking;
                 mutual information; topographic manifold",
}

@Article{Agrafiotis:2007:TEC,
  author =       "D. Agrafiotis and S. J. C. Davies and N. Canagarajah
                 and D. R. Bull",
  title =        "Towards efficient context-specific video coding based
                 on gaze-tracking analysis",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "4",
  pages =        "4:1--4:15",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1314303.1314307",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:11:20 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article discusses a framework for model-based,
                 context-dependent video coding based on exploitation of
                 characteristics of the human visual system. The system
                 utilizes variable-quality coding based on priority maps
                 which are created using mostly context-dependent rules.
                 The technique is demonstrated through two case studies
                 of specific video context, namely open signed content
                 and football sequences. Eye-tracking analysis is
                 employed for identifying the characteristics of each
                 context, which are subsequently exploited for coding
                 purposes, either directly or through a gaze prediction
                 model. The framework is shown to achieve a considerable
                 improvement in coding efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "applications; context-based video coding; eye
                 tracking; multimedia perceptual quality; subjective
                 video quality; transformation of eye movements into
                 useful knowledge",
}

@Article{Urruty:2007:DEF,
  author =       "Thierry Urruty and Stanislas Lew and Nacim Ihadaddene
                 and Dan A. Simovici",
  title =        "Detecting eye fixations by projection clustering",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "4",
  pages =        "5:1--5:20",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1314303.1314308",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:11:20 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Eye movements are certainly the most natural and
                 repetitive movement of a human being. The most mundane
                 activity, such as watching television or reading a
                 newspaper, involves this automatic activity which
                 consists of shifting our gaze from one point to
                 another.\par

                 Identification of the components of eye movements
                 (fixations and saccades) is an essential part in the
                 analysis of visual behavior because these types of
                 movements provide the basic elements used by further
                 investigations of human vision.\par

                 However, many of the algorithms that detect fixations
                 present a number of problems. In this article, we
                 present a new fixation identification technique that is
                 based on clustering of eye positions, using projections
                 and projection aggregation applied to static pictures.
                 We also present a new method that computes dispersion
                 of eye fixations in videos considering a multiuser
                 environment.\par

                 To demonstrate the performance and usefulness of our
                 approach we discuss our experimental work with two
                 different applications: on fixed image and video.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "eye fixations; interaction modeling; projected
                 clustering; static pictures; videos",
}

@Article{Duchowski:2007:FGC,
  author =       "Andrew T. Duchowski and Arzu {\c{C}}{\"o}ltekin",
  title =        "Foveated gaze-contingent displays for peripheral {LOD}
                 management, {$3$D} visualization, and stereo imaging",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "4",
  pages =        "6:1--6:18",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1314303.1314309",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:11:20 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Advancements in graphics hardware have allowed
                 development of hardware-accelerated imaging displays.
                 This article reviews techniques for real-time
                 simulation of arbitrary visual fields over still images
                 and video. The goal is to provide the vision sciences
                 and perceptual graphics communities techniques for the
                 investigation of fundamental processes of visual
                 perception. Classic gaze-contingent displays used for
                 these purposes are reviewed and for the first time a
                 pixel shader is introduced for display of a
                 high-resolution window over peripherally degraded
                 stimulus. The pixel shader advances current
                 state-of-the-art by allowing real-time processing of
                 still or streamed images, obviating the need for
                 preprocessing or storage.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "eye tracking; foveation; gaze-contingent displays;
                 level-of-detail",
}

@Article{Loschky:2007:HLC,
  author =       "Lester C. Loschky and Gary S. Wolverton",
  title =        "How late can you update gaze-contingent
                 multiresolutional displays without detection?",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "4",
  pages =        "7:1--7:10",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1314303.1314310",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:11:20 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This study investigated perceptual disruptions in
                 gaze-contingent multiresolutional displays (GCMRDs) due
                 to delays in updating the center of highest resolution
                 after an eye movement. GCMRDs can be used to save
                 processing resources and transmission bandwidth in many
                 types of single-user display applications, such as
                 virtual reality, video-telephony, simulators, and
                 remote piloting. The current study found that image
                 update delays as late as 60 ms after an eye movement
                 did not significantly increase the detectability of
                 image blur and/or motion transients due to the update.
                 This is good news for designers of GCMRDs, since 60 ms
                 is ample time to update many GCMRDs after an eye
                 movement without disrupting perception. The study also
                 found that longer eye movements led to greater blur
                 and/or transient detection due to moving the eyes
                 further into the low-resolution periphery, effectively
                 reducing the image resolution at fixation prior to the
                 update. In GCMRD applications where longer saccades are
                 more likely (e.g., displays with relatively large
                 distances between objects), this problem could be
                 overcome by increasing the size of the region of
                 highest resolution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "area of interest; bandwidth; blur detection; contrast
                 thresholds; display updates; eye movements; eye
                 tracking; foveated; foveation; gaze-contingent;
                 level-of-detail; multiresolution; perceptual
                 compression; peripheral vision; saccades; saccadic
                 suppression; visual perception",
}

@Article{Murray:2007:AEG,
  author =       "Norman Murray and Dave Roberts and Anthony Steed and
                 Paul Sharkey and Paul Dickerson and John Rae",
  title =        "An assessment of eye-gaze potential within immersive
                 virtual environments",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "4",
  pages =        "8:1--8:17",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1314303.1314311",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:11:20 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In collaborative situations, eye gaze is a critical
                 element of behavior which supports and fulfills many
                 activities and roles. In current computer-supported
                 collaboration systems, eye gaze is poorly supported.
                 Even in a state-of-the-art video conferencing system
                 such as the access grid, although one can see the face
                 of the user, much of the communicative power of eye
                 gaze is lost. This article gives an overview of some
                 preliminary work that looks towards integrating eye
                 gaze into an immersive collaborative virtual
                 environment and assessing the impact that this would
                 have on interaction between the users of such a system.
                 Three experiments were conducted to assess the efficacy
                 of eye gaze within immersive virtual environments. In
                 each experiment, subjects observed on a large screen
                 the eye-gaze behavior of an avatar. The eye-gaze
                 behavior of that avatar had previously been recorded
                 from a user with the use of a head-mounted eye tracker.
                 The first experiment was conducted to assess the
                 difference between users' abilities to judge what
                 objects an avatar is looking at with only head gaze
                 being viewed and also with eye- and head-gaze data
                 being displayed. The results from the experiment show
                 that eye gaze is of vital importance to the subjects,
                 correctly identifying what a person is looking at in an
                 immersive virtual environment. The second experiment
                 examined whether a monocular or binocular eye-tracker
                 would be required. This was examined by testing
                 subjects' ability to identify where an avatar was
                 looking from their eye direction alone, or by eye
                 direction combined with convergence. This experiment
                 showed that convergence had a significant impact on the
                 subjects' ability to identify where the avatar was
                 looking. The final experiment looked at the effects of
                 stereo and mono-viewing of the scene, with the subjects
                 being asked to identify where the avatar was looking.
                 This experiment showed that there was no difference in
                 the subjects' ability to detect where the avatar was
                 gazing. This is followed by a description of how the
                 eye-tracking system has been integrated into an
                 immersive collaborative virtual environment and some
                 preliminary results from the use of such a system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "eye gaze; immersive virtual environments",
}

@Article{Rachovides:2007:CIM,
  author =       "Dorothy Rachovides and James Walkerdine and Peter
                 Phillips",
  title =        "The conductor interaction method",
  journal =      j-TOMCCAP,
  volume =       "3",
  number =       "4",
  pages =        "9:1--9:23",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1314303.1314312",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:11:20 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Computers have increasingly become part of our
                 everyday lives, with many activities either involving
                 their direct use or being supported by one. This has
                 prompted research into developing methods and
                 mechanisms to assist humans in interacting with
                 computers (human-computer interaction, or HCI). A
                 number of HCI techniques have been developed over the
                 years, some of which are quite old but continue to be
                 used, and some more recent and still evolving. Many of
                 these interaction techniques, however, are not natural
                 in their use and typically require the user to learn a
                 new means of interaction. Inconsistencies within these
                 techniques and the restrictions they impose on user
                 creativity can also make such interaction techniques
                 difficult to use, especially for novice users.\par

                 This article proposes an alternative interaction
                 method, the conductor interaction method (CIM), which
                 aims to provide a more natural and easier-to-learn
                 interaction technique. This novel interaction method
                 extends existing HCI methods by drawing upon techniques
                 found in human-human interaction. It is argued that the
                 use of a two-phased multimodal interaction mechanism,
                 using gaze for selection and gesture for manipulation,
                 incorporated within a metaphor-based environment, can
                 provide a viable alternative for interacting with a
                 computer (especially for novice users). Both the model
                 and an implementation of the CIM within a system are
                 presented in this article. This system formed the basis
                 of a number of user studies that have been performed to
                 assess the effectiveness of the CIM, the findings of
                 which are discussed in this work.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "gaze- and gesture-based interfaces; human-computer
                 interaction",
}

@Article{Luo:2008:IFH,
  author =       "Hangzai Luo and Yuli Gao and Xiangyang Xue and Jinye
                 Peng and Jianping Fan",
  title =        "Incorporating feature hierarchy and boosting to
                 achieve more effective classifier training and
                 concept-oriented video summarization and skimming",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324287.1324288",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:06 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "For online medical education purposes, we have
                 developed a novel scheme to incorporate the results of
                 semantic video classification to select the most
                 representative video shots for generating
                 concept-oriented summarization and skimming of surgery
                 education videos. First, salient objects are used as
                 the video patterns for feature extraction to achieve a
                 good representation of the intermediate video
                 semantics. The salient objects are defined as the
                 salient video compounds that can be used to
                 characterize the most significant perceptual properties
                 of the corresponding real world physical objects in a
                 video, and thus the appearances of such salient objects
                 can be used to predict the appearances of the relevant
                 semantic video concepts in a specific video domain.
                 Second, a novel multi-modal boosting algorithm is
                 developed to achieve more reliable video classifier
                 training by incorporating feature hierarchy and
                 boosting to dramatically reduce both the training cost
                 and the size of training samples, thus it can
                 significantly speed up SVM (support vector machine)
                 classifier training. In addition, the unlabeled samples
                 are integrated to reduce the human efforts on labeling
                 large amount of training samples. Finally, the results
                 of semantic video classification are incorporated to
                 enable concept-oriented video summarization and
                 skimming. Experimental results in a specific domain of
                 surgery education videos are provided.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "concept-oriented video skimming; feature hierarchy;
                 multi-modal boosting; salient objects; semantic video
                 classification; unlabeled samples",
}

@Article{Hefeeda:2008:RDO,
  author =       "Mohamed Hefeeda and Cheng-Hsin Hsu",
  title =        "Rate-distortion optimized streaming of fine-grained
                 scalable video sequences",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324287.1324289",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:06 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We present optimal schemes for allocating bits of
                 fine-grained scalable video sequences among multiple
                 senders streaming to a single receiver. This allocation
                 problem is critical in optimizing the perceived quality
                 in peer-to-peer and distributed multi-server streaming
                 environments. Senders in such environments are
                 heterogeneous in their outgoing bandwidth and they hold
                 different portions of the video stream. We first
                 formulate and optimally solve the problem for
                 individual frames, then we generalize to the multiple
                 frame case. Specifically, we formulate the allocation
                 problem as an optimization problem, which is nonlinear
                 in general. We use rate-distortion models in the
                 formulation to achieve the minimum distortion in the
                 rendered video, constrained by the outgoing bandwidth
                 of senders, availability of video data at senders, and
                 incoming bandwidth of receiver. We show how the adopted
                 rate-distortion models transform the nonlinear problem
                 to an integer linear programming (ILP) problem. We then
                 design a simple rounding scheme that transforms the ILP
                 problem to a linear programming (LP) one, which can be
                 solved efficiently using common optimization techniques
                 such as the Simplex method. We prove that our rounding
                 scheme always produces a feasible solution, and the
                 solution is within a negligible margin from the optimal
                 solution. We also propose a new algorithm (FGSAssign)
                 for the single-frame allocation problem that runs in $
                 O(n \log n) $ steps, where n is the number of senders.
                 We prove that FGSAssign is optimal. Furthermore, we
                 propose a heuristic algorithm (mFGSAssign) that
                 produces near-optimal solutions for the multiple-frame
                 case, and runs an order of magnitude faster than the
                 optimal one. Because of its short running time,
                 mFGSAssign can be used in real time. Our experimental
                 study validates our analytical analysis and shows the
                 effectiveness of our allocation algorithms in improving
                 the video quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "distributed streaming; FGS; fine-grained scalable
                 streaming; peer-to-peer streaming; rate-distortion
                 models; rate-distortion optimized streaming; video
                 streaming",
}

@Article{Babich:2008:VQE,
  author =       "Fulvio Babich and Marco D'orlando and Francesca
                 Vatta",
  title =        "Video quality estimation in wireless {IP} networks:
                 {Algorithms} and applications",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324287.1324290",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:06 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article proposes three methods to estimate the
                 distortion deriving from packet losses in wireless
                 video communication. The proposed methods take into
                 account the short-term properties of the encoded video
                 sequences. A suitable set of functions is adopted to
                 model the distortion envelope resulting from multiple
                 losses. The estimated performance is compared with the
                 actual distortion, evaluated by decoding the received
                 sequence with a properly designed decoder. Numerical
                 results confirm the accuracy of the proposed models in
                 approximating the actual Mean Square Error (MSE) for a
                 wide range of loss rates. Some applications of the
                 proposed algorithms are presented.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "distortion estimation; error-concealment;
                 error-resilience; H.264; packet loss rate; real time
                 video; wireless networks",
}

@Article{Kotharu:2008:PQR,
  author =       "Phani S. Kotharu and B. Prabhakaran",
  title =        "Partial query resolution for animation authoring",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324287.1324291",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:06 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Animations are a part of multimedia and techniques
                 such as motion mapping and inverse kinematics aid in
                 reusing models and motion sequences to create new
                 animations. This reuse approach is facilitated by the
                 use of content-based retrieval techniques that often
                 require fuzzy query resolution. Most fuzzy query
                 resolution approaches work on all the attributes of the
                 query to minimize the database access cost thus
                 resulting in an unsatisfactory result set. It turns out
                 that the query resolution can be carried out in a
                 partial manner to achieve user satisfactory results and
                 aid in easy authoring. In this article, we present two
                 partial fuzzy query resolution approaches, one that
                 results in high-quality animations and the other that
                 produces results with decreasing number of satisfied
                 conditions in the query.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "aggregation function; animation toolkit; fuzzy query;
                 multimedia authoring; partial ordering; top-k query",
}

@Article{Ip:2008:RRS,
  author =       "Alan T. S. Ip and John C. S. Lui and Jiangchuan Liu",
  title =        "A revenue-rewarding scheme of providing incentive for
                 cooperative proxy caching for media streaming systems",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324287.1324292",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:06 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Network entities cooperating together can improve
                 system performance of media streaming. In this paper,
                 we address the ``incentive issue'' of a cooperative
                 proxy caching system and how to motivate each proxy to
                 provide cache space to the system. To encourage proxies
                 to participate, we propose a ``revenue-rewarding
                 scheme'' to credit the cooperative proxies according to
                 the resources they contribute. A game-theoretic model
                 is used to analyze the interactions among proxies under
                 the revenue-rewarding scheme. We propose two
                 cooperative game settings that lead to optimal
                 situations. In particular, (1) We propose a distributed
                 incentive framework for peers to participate in
                 resource contribution for media streaming; (2) Proxies
                 are encouraged to cooperate under the revenue-rewarding
                 scheme; (3) Profit and social welfare are maximized in
                 these cooperative games; and (4) Cost-effective
                 resource allocation is achieved in these cooperative
                 games. Large scale simulation is carried out to
                 validate and verify the merits of our proposed
                 incentive schemes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "game-theoretic analysis; incentive mechanism; Nash
                 equilibrium; pricing; resource allocation",
}

@Article{Zhang:2008:AEE,
  author =       "Cha Zhang and Yong Rui and Jim Crawford and Li-Wei
                 He",
  title =        "An automated end-to-end lecture capture and
                 broadcasting system",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324287.1324293",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:06 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Remote viewing of lectures presented to a live
                 audience is becoming increasingly popular. At the same
                 time, the lectures can be recorded for subsequent
                 on-demand viewing over the Internet. Providing such
                 services, however, is often prohibitive due to the
                 labor-intensive cost of capturing and
                 pre/post-processing. This article presents a complete
                 automated end-to-end system that supports capturing,
                 broadcasting, viewing, archiving and searching of
                 presentations. Specifically, we describe a system
                 architecture that minimizes the pre- and
                 post-production time, and a fully automated lecture
                 capture system called iCam2 that synchronously captures
                 all contents of the lecture, including audio, video,
                 and presentation material. No staff is needed during
                 lecture capture and broadcasting, so the operational
                 cost of the system is negligible. The system has been
                 used on a daily basis for more than 4 years, during
                 which 522 lectures have been captured. These lectures
                 have been viewed over 20,000 times.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "automated lecture capture; lecture broadcasting;
                 live/on-demand broadcasting",
}

@Article{Nguyen:2008:OIV,
  author =       "Giang Phuong Nguyen and Marcel Worring",
  title =        "Optimization of interactive visual-similarity-based
                 search",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324287.1324294",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:06 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "At one end of the spectrum, research in interactive
                 content-based retrieval concentrates on machine
                 learning methods for effective use of relevance
                 feedback. On the other end, the information
                 visualization community focuses on effective methods
                 for conveying information to the user. What is lacking
                 is research considering the information visualization
                 and interactive retrieval as truly integrated parts of
                 one content-based search system. In such an integrated
                 system, there are many degrees of freedom like the
                 similarity function, the number of images to display,
                 the image size, different visualization modes, and
                 possible feedback modes. To base the optimal values for
                 all of those on user studies is unfeasible. We
                 therefore develop search scenarios in which tasks and
                 user actions are simulated. From there, the proposed
                 scheme is optimized based on objective constraints and
                 evaluation criteria. In such a manner, the degrees of
                 freedom are reduced and the remaining degrees can be
                 evaluated in user studies. In this article, we present
                 a system that integrates advanced similarity based
                 visualization with active learning. We have performed
                 extensive experimentation on interactive category
                 search with different image collections. The results
                 using the proposed simulation scheme show that indeed
                 the use of advanced visualization and active learning
                 pays off in all of these datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "active learning; interactive search; similarity based
                 visualization",
}

@Article{Hlavacs:2008:HVP,
  author =       "Helmut Hlavacs and Shelley Buchinger",
  title =        "Hierarchical video patching with optimal server
                 bandwidth",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324287.1324295",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:06 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Video patching is a way for transporting true
                 video-on-demand, that is, instantaneous without any
                 delay, from a video server to several clients. Instead
                 of sending a unique stream to each newly arriving
                 client, clients share as many multicast transmissions
                 as possible, and are serviced only those parts of the
                 video that they have missed.\par

                 We present a novel video patching scheme using
                 hierarchies of patches. Our scheme minimizes the
                 bandwidth needed by the video server, and may result in
                 the fact that clients receive several streams in
                 parallel. We show analytically that for Poisson arrival
                 our algorithm achieves the optimal possible server
                 bandwidth for all schemes where clients share multicast
                 transmissions.\par

                 We also show, how our approach can be combined with
                 batching. This combination requires less server
                 bandwidth than all fixed start point periodic broadcast
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "batching; server bandwidth; true video-on-demand;
                 video patching",
}

@Article{Chen:2008:ASD,
  author =       "Songqing Chen and Shiping Chen and Huiping Guo and Bo
                 Shen and Sushil Jajodia",
  title =        "Achieving simultaneous distribution control and
                 privacy protection for {Internet} media delivery",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "2",
  pages =        "9:1--9:??",
  month =        may,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1352012.1352013",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:37 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Massive Internet media distribution demands prolonged
                 continuous consumption of networking and disk
                 bandwidths in large capacity. Many proxy-based Internet
                 media distribution algorithms and systems have been
                 proposed, implemented, and evaluated to address the
                 scalability and performance issue. However, few of them
                 have been used in practice, since two important issues
                 are not satisfactorily addressed. First, existing
                 proxy-based media distribution architectures lack an
                 efficient media distribution control mechanism. Without
                 copyright protection, content providers are hesitant to
                 use proxy-based fast distribution techniques. Second,
                 little has been done to protect client privacy during
                 content accesses on the Internet. Straightforward
                 solutions to address these two issues independently
                 lead to conflicts. For example, to enforce distribution
                 control, only legitimate users should be granted access
                 rights. However, this normally discloses more
                 information (such as which object the client is
                 accessing) other than the client identity, which
                 conflicts with the client's desire for privacy
                 protection. In this article, we propose a unified
                 proxy-based media distribution protocol to effectively
                 address these two problems simultaneously. We further
                 design a set of new algorithms in a cooperative proxy
                 environment where our proposed scheme works efficiently
                 and practically. Simulation-based experiments are
                 conducted to extensively evaluate the proposed system.
                 Preliminary results demonstrate the effectiveness of
                 our proposed strategy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "cooperative proxy; distribution control; media
                 delivery; privacy; proxy caching",
}

@Article{Li:2008:FSE,
  author =       "Rui Li and Bir Bhanu and Anlei Dong",
  title =        "Feature synthesized {EM} algorithm for image
                 retrieval",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "2",
  pages =        "10:1--10:??",
  month =        may,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1352012.1352014",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:37 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "As a commonly used unsupervised learning algorithm in
                 Content-Based Image Retrieval (CBIR),
                 Expectation-Maximization (EM) algorithm has several
                 limitations, including the curse of dimensionality and
                 the convergence at a local maximum. In this article, we
                 propose a novel learning approach, namely
                 Coevolutionary Feature Synthesized
                 Expectation-Maximization (CFS-EM), to address the above
                 problems. The CFS-EM is a hybrid of coevolutionary
                 genetic programming (CGP) and EM algorithm applied on
                 partially labeled data. CFS-EM is especially suitable
                 for image retrieval because the images can be searched
                 in the synthesized low-dimensional feature space, while
                 a kernel-based method has to make classification
                 computation in the original high-dimensional space.
                 Experiments on real image databases show that CFS-EM
                 outperforms Radial Basis Function Support Vector
                 Machine (RBF-SVM), CGP, Discriminant-EM (D-EM) and
                 Transductive-SVM (TSVM) in the sense of classification
                 performance and it is computationally more efficient
                 than RBF-SVM in the query phase.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "coevolutionary feature synthesis; content-based image
                 retrieval; expectation maximization; semi-supervised
                 learning",
}

@Article{Xu:2008:AKG,
  author =       "Min Xu and Changsheng Xu and Lingyu Duan and Jesse S.
                 Jin and Suhuai Luo",
  title =        "Audio keywords generation for sports video analysis",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "2",
  pages =        "11:1--11:??",
  month =        may,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1352012.1352015",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:37 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Sports video has attracted a global viewership.
                 Research effort in this area has been focused on
                 semantic event detection in sports video to facilitate
                 accessing and browsing. Most of the event detection
                 methods in sports video are based on visual features.
                 However, being a significant component of sports video,
                 audio may also play an important role in semantic event
                 detection. In this paper, we have borrowed the concept
                 of the ``keyword'' from the text mining domain to
                 define a set of specific audio sounds. These specific
                 audio sounds refer to a set of game-specific sounds
                 with strong relationships to the actions of players,
                 referees, commentators, and audience, which are the
                 reference points for interesting sports events. Unlike
                 low-level features, audio keywords can be considered as
                 a mid-level representation, able to facilitate
                 high-level analysis from the semantic concept point of
                 view. Audio keywords are created from low-level audio
                 features with learning by support vector machines. With
                 the help of video shots, the created audio keywords can
                 be used to detect semantic events in sports video by
                 Hidden Markov Model (HMM) learning. Experiments on
                 creating audio keywords and, subsequently, event
                 detection based on audio keywords have been very
                 encouraging. Based on the experimental results, we
                 believe that the audio keyword is an effective
                 representation that is able to achieve satisfying
                 results for event detection in sports video.
                 Application in three sports types demonstrates the
                 practicality of the proposed method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "audio keywords; event detection; semantics analysis;
                 sports video analysis; support vector machines",
}

@Article{Tullimas:2008:MSU,
  author =       "Sunand Tullimas and Thinh Nguyen and Rich Edgecomb and
                 Sen-ching Cheung",
  title =        "Multimedia streaming using multiple {TCP}
                 connections",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "2",
  pages =        "12:1--12:??",
  month =        may,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1352012.1352016",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:37 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In recent years, multimedia applications over the
                 Internet become increasingly popular. However, packet
                 loss, delay, and time-varying bandwidth of the Internet
                 have remained the major problems for multimedia
                 streaming applications. As such, a number of
                 approaches, including network infrastructure and
                 protocol, source and channel coding, have been proposed
                 to either overcome or alleviate these drawbacks of the
                 Internet. In this article, we propose the MultiTCP
                 system, a receiver-driven, TCP-based system for
                 multimedia streaming over the Internet. Our proposed
                 algorithm aims at providing resilience against short
                 term insufficient bandwidth by using multiple TCP
                 connections for the same application. Our proposed
                 system enables the application to achieve and control
                 the desired sending rate during congested periods,
                 which cannot be achieved using traditional TCP.
                 Finally, our proposed system is implemented at the
                 application layer, and hence, no kernel modification to
                 TCP is necessary. We analyze the proposed system, and
                 present simulation and experimental results to
                 demonstrate its advantages over the traditional
                 single-TCP-based approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "multimedia streaming",
}

@Article{Tjondronegoro:2008:SES,
  author =       "Dian Tjondronegoro and Yi-Ping Phoebe Chen and Adrien
                 Joly",
  title =        "A scalable and extensible segment-event-object-based
                 sports video retrieval system",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "2",
  pages =        "13:1--13:??",
  month =        may,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1352012.1352017",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:37 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Sport video data is growing rapidly as a result of the
                 maturing digital technologies that support digital
                 video capture, faster data processing, and large
                 storage. However, (1) semi-automatic content extraction
                 and annotation, (2) scalable indexing model, and (3)
                 effective retrieval and browsing, still pose the most
                 challenging problems for maximizing the usage of large
                 video databases. This article will present the findings
                 from a comprehensive work that proposes a scalable and
                 extensible sports video retrieval system with two major
                 contributions in the area of sports video indexing and
                 retrieval. The first contribution is a new sports video
                 indexing model that utilizes semi-schema-based indexing
                 scheme on top of an Object-Relationship approach. This
                 indexing model is scalable and extensible as it enables
                 gradual index construction which is supported by
                 ongoing development of future content extraction
                 algorithms. The second contribution is a set of novel
                 queries which are based on XQuery to generate dynamic
                 and user-oriented summaries and event structures. The
                 proposed sports video retrieval system has been fully
                 implemented and populated with soccer, tennis,
                 swimming, and diving video. The system has been
                 evaluated against 20 users to demonstrate and confirm
                 its feasibility and benefits. The experimental sports
                 genres were specifically selected to represent the four
                 main categories of sports domain: period-, set-point-,
                 time (race)-, and performance-based sports. Thus, the
                 proposed system should be generic and robust for all
                 types of sports.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "automatic content extraction; indexing; mobile video
                 interaction; MPEG-7; sports video retrieval; video
                 database system; XML; XQuery",
}

@Article{Zimmermann:2008:DMP,
  author =       "Roger Zimmermann and Elaine Chew and Sakire Arslan Ay
                 and Moses Pawar",
  title =        "Distributed musical performances: {Architecture} and
                 stream management",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "2",
  pages =        "14:1--14:??",
  month =        may,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1352012.1352018",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:37 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "An increasing number of novel applications produce a
                 rich set of different data types that need to be
                 managed efficiently and coherently. In this article we
                 present our experience with designing and implementing
                 a data management infrastructure for a distributed
                 immersive performance (DIP) application. The DIP
                 project investigates a versatile framework for the
                 capture, recording, and replay of video, audio, and
                 MIDI (Musical Instrument Digital Interface) streams in
                 an interactive environment for collaborative music
                 performance. We are focusing on two classes of data
                 streams that are generated within this environment. The
                 first category consists of high-resolution isochronous
                 media streams, namely audio and video. The second class
                 comprises MIDI data produced by electronic instruments.
                 MIDI event sequences are alphanumeric in nature and
                 fall into the category of the data streams that have
                 been of interest to data management researchers in
                 recent years.\par

                 We present our data management architecture, which
                 provides a repository for all DIP data. Streams of both
                 categories need to be acquired, transmitted, stored,
                 and replayed in real time. Data items are correlated
                 across different streams with temporal indices. The
                 audio and video streams are managed in our own
                 High-performance Data Recording Architecture (HYDRA),
                 which integrates multistream recording and retrieval in
                 a consistent manner. This paper reports on the
                 practical issues and challenges that we encountered
                 during the design, implementation and experimental
                 phases of our prototype. We also present some analysis
                 results and discuss future extensions for the
                 architecture.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "distributed immersive performance; multimedia storage;
                 multimodal data recorder; networked musical
                 performance",
}

@Article{Hsu:2008:ACR,
  author =       "Cheng-Hsin Hsu and Mohamed Hefeeda",
  title =        "On the accuracy and complexity of rate-distortion
                 models for fine-grained scalable video sequences",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "2",
  pages =        "15:1--15:??",
  month =        may,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1352012.1352019",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:37 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Rate-distortion (R-D) models are functions that
                 describe the relationship between the bitrate and
                 expected level of distortion in the reconstructed video
                 stream. R-D models enable optimization of the received
                 video quality in different network conditions. Several
                 R-D models have been proposed for the increasingly
                 popular fine-grained scalable video sequences. However,
                 the models' relative performance has not been
                 thoroughly analyzed. Moreover, the time complexity of
                 each model is not known, nor is the range of bitrates
                 in which the model produces valid results. This lack of
                 quantitative performance analysis makes it difficult to
                 select the model that best suits a target streaming
                 system. In this article, we classify, analyze, and
                 rigorously evaluate all R-D models proposed for FGS
                 coders in the literature. We classify R-D models into
                 three categories: analytic, empirical, and
                 semi-analytic. We describe the characteristics of each
                 category. We analyze the R-D models by following their
                 mathematical derivations, scrutinizing the assumptions
                 made, and explaining when the assumptions fail and why.
                 In addition, we implement all R-D models, a total of
                 eight, and evaluate them using a diverse set of video
                 sequences. In our evaluation, we consider various
                 source characteristics, diverse channel conditions,
                 different encoding/decoding parameters, different frame
                 types, and several performance metrics including
                 accuracy, range of applicability, and time complexity
                 of each model. We also present clear systematic ways
                 (pseudo codes) for constructing various R-D models from
                 a given video sequence. Based on our experimental
                 results, we present a justified list of recommendations
                 on selecting the best R-D models for video-on-demand,
                 video conferencing, real-time, and peer-to-peer
                 streaming systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "fine-grained scalable coding; multimedia streaming;
                 rate-distortion models",
}

@Article{Wang:2008:MST,
  author =       "Bing Wang and Jim Kurose and Prashant Shenoy and Don
                 Towsley",
  title =        "Multimedia streaming via {TCP}: an analytic
                 performance study",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "2",
  pages =        "16:1--16:??",
  month =        may,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1352012.1352020",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Jun 16 17:12:37 MDT 2008",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "TCP is widely used in commercial multimedia streaming
                 systems, with recent measurement studies indicating
                 that a significant fraction of Internet streaming media
                 is currently delivered over HTTP/TCP. These
                 observations motivate us to develop analytic
                 performance models to systematically investigate the
                 performance of TCP for both live and stored-media
                 streaming. We validate our models via ns simulations
                 and experiments conducted over the Internet. Our models
                 provide guidelines indicating the circumstances under
                 which TCP streaming leads to satisfactory performance,
                 showing, for example, that TCP generally provides good
                 streaming performance when the achievable TCP
                 throughput is roughly twice the media bitrate, with
                 only a few seconds of startup delay.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "multimedia streaming; performance modeling",
}

@Article{Lin:2008:NNB,
  author =       "Tsungnan Lin and Chiapin Wang and Po-Chiang Lin",
  title =        "A neural-network-based context-aware handoff algorithm
                 for multimedia computing",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "3",
  pages =        "17:1--17:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1386109.1386110",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:12 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The access of multimedia computing in wireless
                 networks is concerned with the performance of handoff
                 because of the irretrievable property of real-time data
                 delivery. To lessen throughput degradation incurred by
                 unnecessary handoffs or handoff latencies leading to
                 media disruption perceived by users, this paper
                 presents a link quality based handoff algorithm. Neural
                 networks are used to learn the cross-layer correlation
                 between the link quality estimator such as packet
                 success rate and the corresponding context metric
                 indicators, for example, the transmitting packet
                 length, received signal strength, and signal to noise
                 ratio. Based on a pre-processed learning of link
                 quality profile, neural networks make essential handoff
                 decisions efficiently with the evaluations of link
                 quality instead of the comparisons between relative
                 signal strength. The experiment and simulation results
                 show that the proposed algorithm improves the user
                 perceived qualities in a transmission scenario of VoIP
                 applications by minimizing both the number of lost
                 packets and unnecessary handoffs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "context-aware; handoff; Multimedia computing; neural
                 networks",
}

@Article{Franke:2008:TAC,
  author =       "Ingmar S. Franke and Sebastian Pannasch and Jens R.
                 Helmert and Robert Rieger and Rainer Groh and Boris M.
                 Velichkovsky",
  title =        "Towards attention-centered interfaces: an aesthetic
                 evaluation of perspective with eye tracking",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "3",
  pages =        "18:1--18:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1386109.1386111",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:12 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The established method of representing
                 three-dimensional space on a two-dimensional surface
                 involves camera based, point of regard systems,
                 comparable in design to the early ``camera obscura''.
                 However, geometrical limitations of such models lead to
                 distortions of perspective when projected. This
                 research investigated the influence of single- versus
                 multi-perspectives on aesthetic choices within one
                 image. A clear perceptual bias towards
                 multi-perspective images was found, additionally
                 supported by an eye tracking study. We propose that
                 human users are more attracted by multi-perspective
                 images, which emphasize the ``semantic foci'' of the
                 scene, than by those being synthesized statically with
                 only one geometrical prospect.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Eye tracking; perspective projection; scene
                 perception; subjective evaluation",
}

@Article{Wu:2008:ELS,
  author =       "Chuan Wu and Baochun Li and Shuqiao Zhao",
  title =        "Exploring large-scale peer-to-peer live streaming
                 topologies",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "3",
  pages =        "19:1--19:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1386109.1386112",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:12 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Real-world live peer-to-peer (P2P) streaming
                 applications have been successfully deployed in the
                 Internet, delivering live multimedia content to
                 millions of users at any given time. With relative
                 simplicity in design with respect to peer selection and
                 topology construction protocols and without much
                 algorithmic sophistication, current-generation live P2P
                 streaming applications are able to provide users with
                 adequately satisfying viewing experiences. That said,
                 little existing research has provided sufficient
                 insights on the time-varying internal characteristics
                 of peer-to-peer topologies in live streaming. This
                 article presents {\em Magellan}, our collaborative work
                 with UUSee Inc., Beijing, China, for exploring and
                 charting graph theoretical properties of practical P2P
                 streaming topologies, gaining important insights in
                 their topological dynamics over a long period of
                 time.\par

                 With more than 120 GB worth of traces starting
                 September 2006 from a commercially deployed P2P live
                 streaming system that represents UUSee's core product,
                 we have completed a thorough and in-depth investigation
                 of the topological properties in large-scale live P2P
                 streaming, as well as their evolutionary behavior over
                 time, for example, at different times of the day and in
                 flash crowd scenarios. We seek to explore real-world
                 P2P streaming topologies with respect to their graph
                 theoretical metrics, such as the degree, clustering
                 coefficient, and reciprocity. In addition, we compare
                 our findings with results from existing studies on
                 topological properties of P2P file sharing
                 applications, and present new and unique observations
                 specific to streaming. We have observed that live P2P
                 streaming sessions demonstrate excellent scalability, a
                 high level of reciprocity, a clustering phenomenon in
                 each ISP, and a degree distribution that does {\em
                 not\/} follow the power-law distribution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Peer-to-peer streaming; topology characterization",
}

@Article{Goel:2008:LLA,
  author =       "Ashvin Goel and Charles Krasic and Jonathan Walpole",
  title =        "Low-latency adaptive streaming over {TCP}",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "3",
  pages =        "20:1--20:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1386109.1386113",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:12 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Media streaming over TCP has become increasingly
                 popular because TCP's congestion control provides
                 remarkable stability to the Internet. Streaming over
                 TCP requires adapting to bandwidth availability, but
                 unfortunately, TCP can introduce significant latency at
                 the application level, which causes unresponsive and
                 poor adaptation. This article shows that this latency
                 is not inherent in TCP but occurs as a result of
                 throughput-optimized TCP implementations. We show that
                 this latency can be minimized by dynamically tuning
                 TCP's send buffer. Our evaluation shows that this
                 approach leads to better application-level adaptation
                 and it allows supporting interactive and other
                 low-latency applications over TCP.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "low latency streaming; multimedia applications; TCP",
}

@Article{Lim:2008:DPP,
  author =       "Seung-Ho Lim and Yo-Won Jeong and Kyu Ho Park",
  title =        "Data placement and prefetching with accurate bit rate
                 control for interactive media server",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "3",
  pages =        "21:1--21:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1386109.1386114",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:12 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "An interactive Media Server should support
                 unrestricted control to viewers with their service
                 level agreements. It is important to manage video data
                 effectively to facilitate efficient retrieval. In this
                 paper, we propose an efficient placement algorithm as
                 part of an effective retrieval scheme to increase the
                 number of clients who can be provided with interactive
                 service. The proposed management schemes are
                 incorporated with a bit count control method that is
                 based on repeated tuning of quantization parameters to
                 adjust the actual bit count to the target bit count.
                 The encoder using this method can generate coded frames
                 whose sizes are synchronized with the RAID stripe size,
                 so that when various fast-forward levels are accessed
                 we can reduce the seek and rotational latency and
                 enhance the disk throughput of each disk in the RAID
                 system. Experimental results demonstrate that the
                 proposed schemes can significantly improve the average
                 service time and guarantee more users service of
                 quality, and the interactive media server can thereby
                 efficiently service a large number of clients.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "bit count control; disk array; Interactive media
                 server; stripe size; video rate",
}

@Article{Jie:2008:VGD,
  author =       "Li Jie and James J. Clark",
  title =        "Video game design using an eye-movement-dependent
                 model of visual attention",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "3",
  pages =        "22:1--22:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1386109.1386115",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:12 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Eye movements can be used to infer the allocation of
                 covert attention. In this article, we propose to model
                 the allocation of attention in a task-dependent manner
                 based on different eye movement conditions,
                 specifically fixation and pursuit. We show that the
                 image complexity at eye fixation points during
                 fixation, and the pursuit direction during pursuit are
                 significant factors in attention allocation. Results of
                 the study are applied to the design of an interactive
                 computer game. Real-time eye movement information is
                 taken as one of inputs for the game. The utility of
                 such eye information for controlling game difficulty is
                 shown.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Entertainment; eye movements; eye tracking; HCI; video
                 games; visual attention",
}

@Article{Komogortsev:2008:PRT,
  author =       "Oleg V. Komogortsev and Javed I. Khan",
  title =        "Predictive real-time perceptual compression based on
                 eye-gaze-position analysis",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "3",
  pages =        "23:1--23:??",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1386109.1386116",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:12 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article designs a real-time perceptual
                 compression system (RTPCS) based on eye-gaze-position
                 analysis. Our results indicate that the
                 eye-gaze-position containment metric provides more
                 efficient and effective evaluation of an RTPCS than the
                 eye fixation containment. The presented RTPCS is
                 designed for a network communication scenario with a
                 feedback loop delay. The proposed RTPCS uses human
                 visual system properties to compensate for the delay
                 and to provide high ratios of multimedia compression.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "human visual system; Real-time multimedia
                 compression",
}

@Article{Cesar:2008:ISI,
  author =       "Pablo Cesar and Dick C. A. Bulterman and Luiz Fernando
                 Gomes Soares",
  title =        "Introduction to special issue: {Human-centered}
                 television --- directions in interactive digital
                 television research",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "4",
  pages =        "24:1--24:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1412196.1412197",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:32 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The research area of interactive digital TV is in the
                 midst of a significant revival. Unlike the first
                 generation of digital TV, which focused on producer
                 concerns that effectively limited (re)distribution, the
                 current generation of research is closely linked to the
                 role of the user in selecting, producing, and
                 distributing content. The research field of interactive
                 digital television is being transformed into a study of
                 human-centered television. Our guest editorial reviews
                 relevant aspects of this transformation in the three
                 main stages of the content lifecycle: content
                 production, content delivery, and content consumption.
                 While past research on content production tools focused
                 on full-fledged authoring tools for professional
                 editors, current research studies lightweight, often
                 informal end-user authoring systems. In terms of
                 content delivery, user-oriented infrastructures such as
                 peer-to-peer are being seen as alternatives to more
                 traditional broadcast solutions. Moreover, end-user
                 interaction is no longer limited to content selection,
                 but now facilitates nonlinear participatory television
                 productions. Finally, user-to-user communication
                 technologies have allowed television to become a
                 central component of an interconnected social
                 experience. The background context given in this
                 article provides a framework for appreciating the
                 significance of four detailed contributions that
                 highlight important directions in transforming
                 interactive television research.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Interactive television; shared experiences; standards;
                 survey",
}

@Article{Ursu:2008:ITN,
  author =       "Marian F. Ursu and Maureen Thomas and Ian Kegel and
                 Doug Williams and Mika Tuomola and Inger Lindstedt and
                 Terence Wright and Andra Leurdijk and Vilmos Zsombori
                 and Julia Sussner and Ulf Myrestam and Nina Hall",
  title =        "Interactive {TV} narratives: {Opportunities},
                 progress, and challenges",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "4",
  pages =        "25:1--25:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1412196.1412198",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:32 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article is motivated by the question whether
                 television should do more than simply offer interactive
                 services alongside (and separately from) traditional
                 linear programs, in the context of its dominance being
                 seriously challenged and threatened by interactive
                 forms of screen media entertainment. It suggests: yes.
                 Interactive {\em narrativity}, that is, the ability to
                 interact with (and influence) stories whilst they are
                 being told, represents one clear development path for
                 interactive television. The capabilities of computing
                 technology are ripe for exploring this new form of
                 storytelling, from creation to commercial distribution.
                 The article starts by looking at the relationship
                 between narrativity and interactivity in the current
                 context of screen media, and identifies clear signs of
                 interest from certain European public broadcasters in
                 interactive TV narratives. It then presents in detail
                 four recent experimental interactive TV productions in
                 the genres of drama, news, and documentary, developed
                 in collaboration with public broadcasters, which
                 illustrate the potential and richness of this new form
                 of storytelling, but also highlight new technological
                 capabilities necessary for such productions. A number
                 of essential technological requirements are then
                 discussed in more detail in the final part. The article
                 suggests that the ShapeShifting Media Technology,
                 employed in the implementation of the four productions,
                 has made significant advances both at the technological
                 and the creative ends in supporting the development of
                 interactive TV narrativity, but, however, that further
                 developments are required before being able to answer
                 questions such as ``Would end users want such a form of
                 screen media entertainment?'' and ``Would it be
                 effective for both end users and producers?''",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "computational narrativity; digital storytelling;
                 entertainment; Interactive; media; narrativity;
                 nonlinear; screen media; shapeshifting; television",
}

@Article{Cheng:2008:GIP,
  author =       "Bin Cheng and Lex Stein and Hai Jin and Xiaofei Liao
                 and Zheng Zhang",
  title =        "{GridCast}: {Improving} peer sharing for {P2P VoD}",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "4",
  pages =        "26:1--26:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1412196.1412199",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:32 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Video-on-Demand (VoD) is a compelling application, but
                 costly. VoD is costly due to the load it places on
                 video source servers. Many have proposed using
                 peer-to-peer (P2P) techniques to shift load from
                 servers to peers. Yet, nobody has implemented and
                 deployed a system to openly and systematically evaluate
                 how these techniques work.\par

                 This article describes the design, implementation and
                 evaluation of GridCast, a real deployed P2P VoD system.
                 GridCast has been live on CERNET since May of 2006. It
                 provides seek, pause, and play operations, and employs
                 peer sharing to improve system scalability. In peak
                 months, GridCast has served videos to 23,000 unique
                 users. From the first deployment, we have gathered
                 information to understand the system and evaluate how
                 to further improve peer sharing through caching and
                 replication.\par

                 We first show that GridCast with single video caching
                 (SVC) can decrease load on source servers by an average
                 of 22\% from a client-server architecture. We analyze
                 the net effect on system resources and determine that
                 peer upload is largely idle. This leads us to changing
                 the caching algorithm to cache multiple videos (MVC).
                 MVC decreases source load by an average of 51\% over
                 the client-server. The improvement is greater as user
                 load increases. This bodes well for peer-assistance at
                 larger scales.\par

                 A detailed analysis of MVC shows that departure misses
                 become a major issue in a P2P VoD system with caching
                 optimization. Motivated by this observation, we examine
                 how to use replication to eliminate departure misses
                 and further reduce server load. A framework for lazy
                 replication is presented and evaluated in this article.
                 In this framework, two predictors are plugged in to
                 create the working replication algorithm. With these
                 two simple predictors, lazy replication can decrease
                 server load by 15\% from MVC with only a minor increase
                 in network traffic.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "caching; peer-to-peer; replication; Video-on-demand",
}

@Article{Metcalf:2008:EPL,
  author =       "Crysta Metcalf and Gunnar Harboe and Joe Tullio and
                 Noel Massey and Guy Romano and Elaine M. Huang and
                 Frank Bentley",
  title =        "Examining presence and lightweight messaging in a
                 social television experience",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "4",
  pages =        "27:1--27:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1412196.1412200",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:32 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We report on a field evaluation of a prototype social
                 television system (Social TV) that incorporates
                 lightweight messaging as well as ambient awareness of
                 user presence on the system. This evaluation was
                 conducted over a two-week period and involved the
                 participation of ten households. Participants
                 appreciated the ability to see their buddies' presence
                 on the system, the ability to see or suggest the
                 programs they were currently watching, and the ability
                 to send short messages to one another. The presence
                 facilities available in Social TV also allowed
                 participants to learn more about one another's TV
                 viewing habits and preferences, and fostered a sense of
                 connectedness between them. However, they also felt
                 constrained by the limitations of the communication
                 options available to them and demanded free-form text
                 or voice chat to be able to fully express themselves.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "ambient displays; awareness displays;
                 computer-mediated communication; Social television",
}

@Article{Cattelan:2008:WCP,
  author =       "Renan G. Cattelan and Cesar Teixeira and Rudinei
                 Goularte and Maria Da Gra{\c{c}}a C. Pimentel",
  title =        "Watch-and-comment as a paradigm toward ubiquitous
                 interactive video editing",
  journal =      j-TOMCCAP,
  volume =       "4",
  number =       "4",
  pages =        "28:1--28:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1412196.1412201",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:32 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The literature reports research efforts allowing the
                 editing of interactive TV multimedia documents by
                 end-users. In this article we propose complementary
                 contributions relative to end-user generated
                 interactive video, video tagging, and collaboration. In
                 earlier work we proposed the {\em watch-and-comment\/}
                 (WaC) paradigm as the seamless capture of an
                 individual's comments so that corresponding annotated
                 interactive videos be automatically generated. As a
                 proof of concept, we implemented a prototype
                 application, the WaCTool, that supports the capture of
                 digital ink and voice comments over individual frames
                 and segments of the video, producing a declarative
                 document that specifies both: different media stream
                 structure and synchronization.\par

                 In this article, we extend the WaC paradigm in two
                 ways. First, user-video interactions are associated
                 with edit commands and digital ink operations. Second,
                 focusing on collaboration and distribution issues, we
                 employ annotations as simple containers for context
                 information by using them as tags in order to organize,
                 store and distribute information in a P2P-based
                 multimedia capture platform. We highlight the design
                 principles of the watch-and-comment paradigm, and
                 demonstrate related results including the current
                 version of the WaCTool and its architecture. We also
                 illustrate how an interactive video produced by the
                 WaCTool can be rendered in an interactive video
                 environment, the Ginga-NCL player, and include results
                 from a preliminary evaluation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Annotation; Ginga-NCL; interactive digital video; P2P
                 collaboration",
}

@Article{Bailey:2008:SSA,
  author =       "Brian P. Bailey and Nicu Sebe and Alan Hanjalic",
  title =        "Special section from the {ACM Multimedia Conference
                 2007}",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "1",
  pages =        "1:1--1:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1404880.1404881",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:49 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Gleicher:2008:RCI,
  author =       "Michael L. Gleicher and Feng Liu",
  title =        "Re-cinematography: {Improving} the camerawork of
                 casual video",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "1",
  pages =        "2:1--2:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1404880.1404882",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:49 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents an approach to postprocessing
                 casually captured videos to improve apparent camera
                 movement. {\em Re-cinematography\/} transforms each
                 frame of a video such that the video better follows
                 cinematic conventions. The approach breaks a video into
                 shorter segments. Segments of the source video where
                 there is no intentional camera movement are made to
                 appear as if the camera is completely static. For
                 segments with camera motions, camera paths are
                 keyframed automatically and interpolated with matrix
                 logarithms to give velocity-profiled movements that
                 appear intentional and directed. Closeups are inserted
                 to provide compositional variety in otherwise uniform
                 segments. The approach automatically balances the
                 tradeoff between motion smoothness and distortion to
                 the original imagery. Results from our prototype show
                 improvements to poor quality home videos.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "casual video; cinematography; Image stabilization",
}

@Article{Qi:2008:CMV,
  author =       "Guo-Jun Qi and Xian-Sheng Hua and Yong Rui and Jinhui
                 Tang and Tao Mei and Meng Wang and Hong-Jiang Zhang",
  title =        "Correlative multilabel video annotation with temporal
                 kernels",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "1",
  pages =        "3:1--3:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1404880.1404883",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:49 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Automatic video annotation is an important ingredient
                 for semantic-level video browsing, search and
                 navigation. Much attention has been paid to this topic
                 in recent years. These researches have evolved through
                 two paradigms. In the first paradigm, each concept is
                 individually annotated by a pre-trained binary
                 classifier. However, this method ignores the rich
                 information between the video concepts and only
                 achieves limited success. Evolved from the first
                 paradigm, the methods in the second paradigm add an
                 extra step on the top of the first individual
                 classifiers to fuse the multiple detections of the
                 concepts. However, the performance of these methods can
                 be degraded by the error propagation incurred in the
                 first step to the second fusion one. In this article,
                 another paradigm of the video annotation method is
                 proposed to address these problems. It simultaneously
                 annotates the concepts as well as model correlations
                 between them in one step by the proposed {\em
                 Correlative Multilabel\/} (CML) method, which benefits
                 from the compensation of complementary information
                 between different labels. Furthermore, since the video
                 clips are composed by temporally ordered frame
                 sequences, we extend the proposed method to exploit the
                 rich temporal information in the videos. Specifically,
                 a temporal-kernel is incorporated into the CML method
                 based on the discriminative information between {\em
                 Hidden Markov Models\/} (HMMs) that are learned from
                 the videos. We compare the performance between the
                 proposed approach and the state-of-the-art approaches
                 in the first and second paradigms on the widely used
                 TRECVID data set. As to be shown, superior performance
                 of the proposed method is gained.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "concept correlation; multilabeling; temporal kernel;
                 Video annotation",
}

@Article{Chen:2008:DDN,
  author =       "Yinpeng Chen and Weiwei Xu and Hari Sundaram and
                 Thanassis Rikakis and Sheng-Min Liu",
  title =        "A dynamic decision network framework for online media
                 adaptation in stroke rehabilitation",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "1",
  pages =        "4:1--4:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1404880.1404884",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:49 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article, we present a media adaptation
                 framework for an immersive biofeedback system for
                 stroke patient rehabilitation. In our biofeedback
                 system, media adaptation refers to changes in
                 audio/visual feedback as well as changes in physical
                 environment. Effective media adaptation frameworks help
                 patients recover generative plans for arm movement with
                 potential for significantly shortened therapeutic time.
                 The media adaptation problem has significant challenges
                 --- (a) high dimensionality of adaptation parameter
                 space; (b) variability in the patient performance
                 across and within sessions; (c) the actual
                 rehabilitation plan is typically a non-first-order
                 Markov process, making the learning task hard.\par

                 Our key insight is to understand media adaptation as a
                 real-time feedback control problem. We use a
                 mixture-of-experts based Dynamic Decision Network (DDN)
                 for online media adaptation. We train DDN mixtures per
                 patient, per session. The mixture models address two
                 basic questions --- (a) given a specific adaptation
                 suggested by the domain experts, predict the patient
                 performance, and (b) given the expected performance,
                 determine the optimal adaptation decision. The
                 questions are answered through an optimality criterion
                 based search on DDN models trained in previous
                 sessions. We have also developed new validation metrics
                 and have very good results for both questions on actual
                 stroke rehabilitation data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Biofeedback; dynamic decision network; media
                 adaptation; mixture of experts",
}

@Article{Thouin:2008:EAV,
  author =       "Frederic Thouin and Mark Coates",
  title =        "Equipment allocation in video-on-demand network
                 deployments",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "1",
  pages =        "5:1--5:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1404880.1404885",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:49 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Video-on-Demand (VoD) services are very user-friendly,
                 but also complex and resource demanding. Deployments
                 involve careful design of many mechanisms where content
                 attributes and usage models should be taken into
                 account. We define, and propose a methodology to solve,
                 the {\em VoD Equipment Allocation Problem\/} of
                 determining the number and type of streaming servers
                 with directly attached storage (VoD servers) to install
                 at each potential location in a metropolitan area
                 network topology such that deployment costs are
                 minimized. We develop a cost model for VoD deployments
                 based on streaming, storage and transport costs and
                 train a parametric function that maps the amount of
                 available storage to a worst-case hit ratio. We observe
                 the impact of having to determine the amount of storage
                 and streaming cojointly, and determine the minimum
                 demand required to deploy replicas as well as the
                 average hit ratio at each location. We observe that
                 common video-on-demand server configurations lead to
                 the installation of excessive storage, because a
                 relatively high hit-ratio can be achieved with small
                 amounts of storage so streaming requirements
                 dominate.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "equipment allocation; optimization; resource
                 allocation; Video-on-demand",
}

@Article{Kolan:2008:NLV,
  author =       "Prakash Kolan and Ram Dantu and Jo{\~a}o W. Cangussu",
  title =        "Nuisance level of a voice call",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "1",
  pages =        "6:1--6:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1404880.1404886",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:49 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In our everyday life, we communicate with many people
                 such as family, friends, neighbors, and colleagues. We
                 communicate with them using different communication
                 media such as email, telephone calls, and face-to-face
                 interactions. While email is not real-time and
                 face-to-face communications require geographic
                 proximity, voice and video communications are preferred
                 over other modes of communication. However, real-time
                 voice/video calls may create nuisance to the receiver.
                 In this article, we describe a mathematical model for
                 computing nuisance level of incoming voice/video calls.
                 We computed the closeness and nuisance level using the
                 calling patterns between the caller and the callee. To
                 validate the nuisance model, we collected cell phone
                 call records of real-life people at our university and
                 computed the nuisance value for all voice calls. We
                 validated the nuisance levels using the feedback from
                 those real-life people. Such a nuisance model is useful
                 for predicting unwanted voice and video sessions in an
                 IP communication network.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "behavior; Multimedia communications; nuisance;
                 presence; security; tolerance; unwantedness",
}

@Article{Zheng:2008:CVP,
  author =       "Qing-Fang Zheng and Wen Gao",
  title =        "Constructing visual phrases for effective and
                 efficient object-based image retrieval",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "1",
  pages =        "7:1--7:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1404880.1404887",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:49 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The explosion of multimedia data necessitates
                 effective and efficient ways for us to get access to
                 our desired ones. In this article, we draw an analogy
                 between image retrieval and text retrieval and propose
                 a visual phrase-based approach to retrieve images
                 containing desired objects (object-based image
                 retrieval). The visual phrase is defined as a pair of
                 frequently co-occurred adjacent local image patches and
                 is constructed using data mining. We design methods on
                 how to construct visual phrase and how to index/search
                 images based on visual phrase. We demonstrate
                 experiments to show our visual phrase-based approach
                 can be very efficient and more effective than current
                 visual word-based approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Content-based image retrieval; inverted index; local
                 image descriptor; object-based image retrieval; SIFT;
                 visual phrase",
}

@Article{Gill:2008:SDM,
  author =       "Phillipa Gill and Liqi Shi and Anirban Mahanti and
                 Zongpeng Li and Derek L. Eager",
  title =        "Scalable on-demand media streaming for heterogeneous
                 clients",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "1",
  pages =        "8:1--8:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1404880.1404888",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:49 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Periodic broadcast protocols enable efficient
                 streaming of highly popular media files to large
                 numbers of concurrent clients. Most previous periodic
                 broadcast protocols, however, assume that all clients
                 can receive at the same rate, and also assume that
                 reception bandwidth is not time-varying. In this
                 article, we first develop a new periodic broadcast
                 protocol, Optimized Heterogeneous Periodic Broadcast
                 (OHPB), that can be optimized for a given population of
                 clients with heterogeneous reception bandwidths and
                 quality-of-service requirements. The OHPB protocol
                 utilizes an optimized segment size progression
                 determined by solving a linear optimization model that
                 takes as input the client population characteristics
                 and an objective function such as mean client startup
                 delay. We then develop a generalization of the OHPB
                 linear optimization model that allows optimal server
                 bandwidth allocation among multiple concurrent OHPB
                 broadcasts, wherein each media file and its clients may
                 have different characteristics. Finally, we propose
                 complementary client protocols employing work-ahead
                 buffering of data during playback, so as to enable more
                 uniform playback quality when the reception bandwidth
                 is time-varying.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "linear programming; periodic broadcasts;
                 quality-of-service; Scalable streaming",
}

@Article{Jung:2008:SSL,
  author =       "Dawoon Jung and Jaegeuk Kim and Jin-Soo Kim and
                 Joonwon Lee",
  title =        "{ScaleFFS}: a scalable log-structured flash file
                 system for mobile multimedia systems",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "1",
  pages =        "9:1--9:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1404880.1404889",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:51:49 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "NAND flash memory has become one of the most popular
                 storage media for mobile multimedia systems. A key
                 issue in designing storage systems for mobile
                 multimedia systems is handling large-capacity storage
                 media and numerous large files with limited resources
                 such as memory. However, existing flash file systems,
                 including JFFS2 and YAFFS in particular, exhibit many
                 limitations in addressing the storage capacity of
                 mobile multimedia systems.\par

                 In this article, we design and implement a scalable
                 flash file system, called ScaleFFS, for mobile
                 multimedia systems. ScaleFFS is designed to require
                 only a small fixed amount of memory space and to
                 provide fast mount time, even if the file system size
                 grows to more than tens of gigabytes. The measurement
                 results show that ScaleFFS can be instantly mounted
                 regardless of the file system size, while achieving the
                 same write bandwidth and up to 22\% higher read
                 bandwidth compared to JFFS2.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "File system; flash memory; NAND; storage system",
}

@Article{Moncrieff:2008:DPA,
  author =       "Simon Moncrieff and Svetha Venkatesh and Geoff West",
  title =        "Dynamic privacy assessment in a smart house
                 environment using multimodal sensing",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "2",
  pages =        "10:1--10:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1413862.1413863",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:17 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Surveillance applications in private environments such
                 as smart houses require a privacy management policy if
                 such systems are to be accepted by the occupants of the
                 environment. This is due to the invasive nature of
                 surveillance, and the private nature of the home. In
                 this article, we propose a framework for dynamically
                 altering the privacy policy applied to the monitoring
                 of a smart house based on the situation within the
                 environment. Initially the situation, or context,
                 within the environment is determined; we identify
                 several factors for determining environmental context,
                 and propose methods to quantify the context using audio
                 and binary sensor data. The context is then mapped to
                 an appropriate privacy policy, which is implemented by
                 applying data hiding techniques to control access to
                 data gathered from various information sources. The
                 significance of this work lies in the examination of
                 privacy issues related to assisted-living smart house
                 environments. A single privacy policy in such
                 applications would be either too restrictive for an
                 observer, for example, a carer, or too invasive for the
                 occupants. We address this by proposing a dynamic
                 method, with the aim of decreasing the invasiveness of
                 the technology, while retaining the purpose of the
                 system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Assisted living; audio; context aware; privacy;
                 surveillance and monitoring",
}

@Article{Adams:2008:SUS,
  author =       "Brett Adams and Dinh Phung and Svetha Venkatesh",
  title =        "Sensing and using social context",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "2",
  pages =        "11:1--11:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1413862.1413864",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:17 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We present online algorithms to extract social
                 context: Social spheres are labeled locations of
                 significance, represented as convex hulls extracted
                 from GPS traces. Colocation is determined from
                 Bluetooth and GPS to extract social rhythms, patterns
                 in time, duration, place, and people corresponding to
                 real-world activities. Social ties are formulated from
                 proximity and shared spheres and rhythms. Quantitative
                 evaluation is performed for 10+ million samples over 45
                 man-months. Applications are presented with assessment
                 of perceived utility: {\em Socio-Graph}, a video and
                 photo browser with filters for social metadata, and
                 {\em Jive}, a blog browser that uses rhythms to
                 discover similarity between entries automatically.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Multimedia browsing; social context",
}

@Article{Mohanty:2008:IWB,
  author =       "Saraju P. Mohanty and Bharat K. Bhargava",
  title =        "Invisible watermarking based on creation and robust
                 insertion-extraction of image adaptive watermarks",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "2",
  pages =        "12:1--12:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1413862.1413865",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:17 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents a novel invisible robust
                 watermarking scheme for embedding and extracting a
                 digital watermark in an image. The novelty lies in
                 determining a perceptually important subimage in the
                 host image. Invisible insertion of the watermark is
                 performed in the most significant region of the host
                 image such that tampering of that portion with an
                 intention to remove or destroy will degrade the
                 esthetic quality and value of the image. One feature of
                 the algorithm is that this subimage is used as a region
                 of interest for the watermarking process and eliminates
                 the chance of watermark removal. Another feature of the
                 algorithm is the creation of a compound watermark using
                 the input user watermark (logo) and attributes of the
                 host image. This facilitates the homogeneous fusion of
                 a watermark with the cover image, preserves the quality
                 of the host image, and allows robust
                 insertion-extraction. Watermark creation consists of
                 two distinct phases. During the first phase, a
                 statistical image is synthesized from a perceptually
                 important subimage of the image. A compound watermark
                 is created by embedding a watermark (logo) into the
                 statistical synthetic image by using a visible
                 watermarking technique. This compound watermark is
                 invisibly embedded into the important block of the host
                 image. The authentication process involves extraction
                 of the perceptive logo as well statistical testing for
                 two-layer evidence. Results of the experimentation
                 using standard benchmarks demonstrates the robustness
                 and efficacy of the proposed watermarking approach.
                 Ownership proof could be established under various
                 hostile attacks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "content protection; copyright protection; image;
                 invisible watermarking; Watermarking",
}

@Article{Yiu:2008:ODC,
  author =       "Wai-Pun Ken Yiu and Shueng-Han Gary Chan",
  title =        "Offering data confidentiality for multimedia overlay
                 multicast: {Design} and analysis",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "2",
  pages =        "13:1--13:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1413862.1413866",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:17 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Application layer multicast (ALM) has been proposed to
                 overcome current limitations in IP multicast for
                 large-group multimedia communication. We address
                 offering data confidentiality tailored for ALM. To
                 achieve confidentiality, a node may need to
                 continuously {\em re-encrypt\/} packets before
                 forwarding them downstream. Furthermore, keys have to
                 be changed whenever there is a membership change,
                 leading to {\em rekey\/} processing overhead at the
                 nodes. For a large and dynamic group, these
                 reencryption and rekeying operations incur high
                 processing overhead at the nodes. We propose and
                 analyze a scalable scheme called Secure Overlay
                 Multicast (SOM) which clusters ALM peers so as to
                 localize rekeying within a cluster and to limit
                 re-encryption at cluster boundaries, thereby minimizing
                 the total nodal processing overhead. We describe the
                 operations of SOM and compare its nodal processing
                 overhead with two other basic approaches, namely,
                 host-to-host encryption and whole group encryption. We
                 also present a simplified analytic model for SOM and
                 show that there exists an optimal cluster size to
                 minimize the total nodal processing overhead. By
                 comparing with a recently proposed ALM scheme (DT
                 protocol), SOM achieves a substantial reduction in
                 nodal processing overhead with similar network
                 performance in terms of network stress and delay.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Key management; multicast security; overlay multicast;
                 performance analysis",
}

@Article{Nakayama:2008:ECR,
  author =       "Minoru Nakayama and Yosiyuki Takahasi",
  title =        "Estimation of certainty for responses to
                 multiple-choice questionnaires using eye movements",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "2",
  pages =        "14:1--14:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1413862.1413867",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:17 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "To examine the feasibility of estimating the degree of
                 strength of belief (SOB) of responses using eye
                 movements, the scan paths of eye movements were
                 analyzed while subjects reviewed their own responses to
                 multiple choice tasks. All fixation points of eye
                 movements were classified into visual areas, or cells,
                 which corresponded with the positions of answers. Two
                 estimation procedures are proposed using eye-movement
                 data. The first one is identifying SOB using scan-path
                 transitions. By comparing subject's reports of high and
                 low SOB and eye-movement estimations, a significant
                 correct rate of discrimination of SOB was observed.
                 When the threshold of discrimination was controlled, a
                 high rate of correct responses was obtained if it was
                 set at a low level.\par

                 The second procedure is conducting SOB discrimination
                 using support vector machines (SVM) trained with
                 features of fixations. Subject's gazing features were
                 analyzed while they reviewed their own responses. A
                 discrimination model for SOB was trained with several
                 combinations of features to see whether performance of
                 a significant level could be obtained. As a result, a
                 trained model with 3 features (which consist of
                 interval time, vertical difference, and length between
                 fixations) can provide significant discrimination
                 performance for SOB.\par

                 These results provide evidence that strength of belief
                 can be estimated using eye movements",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "certainty; Eye-movements; scan-path analysis; support
                 vector machines",
}

@Article{Shipman:2008:AVG,
  author =       "Frank Shipman and Andreas Girgensohn and Lynn Wilcox",
  title =        "Authoring, viewing, and generating hypervideo: an
                 overview of {Hyper-Hitchcock}",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "2",
  pages =        "15:1--15:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1413862.1413868",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:17 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Hyper-Hitchcock consists of three components for
                 creating and viewing a form of interactive video called
                 detail-on-demand video: a hypervideo editor, a
                 hypervideo player, and algorithms for automatically
                 generating hypervideo summaries. Detail-on-demand video
                 is a form of hypervideo that supports one hyperlink at
                 a time for navigating between video sequences. The
                 Hyper-Hitchcock editor enables authoring of
                 detail-on-demand video without programming and uses
                 video processing to aid in the authoring process. The
                 Hyper-Hitchcock player uses labels and keyframes to
                 support navigation through and back hyperlinks.
                 Hyper-Hitchcock includes techniques for automatically
                 generating hypervideo summaries of one or more videos
                 that take the form of multiple linear summaries of
                 different lengths with links from the shorter to the
                 longer summaries. User studies on authoring and viewing
                 provided insight into the various roles of links in
                 hypervideo and found that player interface design
                 greatly affects people's understanding of hypervideo
                 structure and the video they access.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Hypervideo; link generation; video editing; video
                 summarization",
}

@Article{He:2008:EED,
  author =       "Wenbo He and Klara Nahrstedt and Xue Liu",
  title =        "End-to-end delay control of multimedia applications
                 over multihop wireless links",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "2",
  pages =        "16:1--16:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1413862.1413869",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:17 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The proliferation of multimedia applications over
                 mobile, resource-constrained wireless networks has
                 raised the need for techniques that adapt these
                 applications both to clients' Quality of Service (QoS)
                 requirements and to network resource constraints. This
                 article investigates the upper-layer adaptation
                 mechanisms to achieve end-to-end delay control for
                 multimedia applications. The proposed adaptation
                 approach spans application layer, middleware layer and
                 network layer. In application layer, the requirement
                 adaptor dynamically changes the requirement levels
                 according to end-to-end delay measurement and
                 acceptable QoS requirements for the end-users. In
                 middleware layer, the priority adaptor is used to
                 dynamically adjust the service classes for applications
                 using feedback control theory. In network layer, the
                 service differentiation scheduler assigns different
                 network resources (e.g., bandwidth) to different
                 service classes. With the coordination of these three
                 layers, our approach can adaptively assign resources to
                 multimedia applications. To evaluate the impact of our
                 adaptation scheme, we built a real IEEE 802.11 ad hoc
                 network testbed. The test-bed experiments show that the
                 proposed upper-layer adaptation for end-to-end delay
                 control successfully adjusts multimedia applications to
                 meet delay requirements in many scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "End-to-end delay QoS; wireless ad hoc networks",
}

@Article{Pan:2008:CBM,
  author =       "Leon Pan and Chang N. Zhang",
  title =        "A criterion-based multilayer access control approach
                 for multimedia applications and the implementation
                 considerations",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "2",
  pages =        "17:1--17:??",
  month =        nov,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1413862.1413870",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:17 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article, a novel criterion-based multilayer
                 access control (CBMAC) approach is presented to enhance
                 existing access control models such as Role-Based,
                 Mandatory, and Discretionary Access Control models to
                 support multilayer (multilevel) access control. The
                 proposed approach is based on a set of predefined
                 security criteria which are extracted from
                 authorization rules. The security attributes of objects
                 and users are specified by security criterion
                 expressions (serving as locks) and the elements
                 (serving as keys) of security criterion subsets
                 respectively. An object embedded with a number of
                 security criterion expressions becomes a secure object
                 while a user associated with a security criterion
                 subset is called a secure user. The multilayer access
                 control is achieved by evaluating the embedded security
                 criterion expressions (actuating locks) by the elements
                 (keys) in a user's security criterion subset. The paper
                 also provides the details of integrating the proposed
                 approach with existing access control models and
                 presents the implementation considerations of
                 Criterion-Based Role-Based Multilayer Access Control,
                 the integration of CBMAC and Role-Based Access
                 Control.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Multilayer access control; secure object; secure
                 permission; secure user; security criterion",
}

@Article{Candan:2009:ISS,
  author =       "K. Sel{\c{c}}uk Candan and Alberto {Del Bimbo} and
                 Carsten Griwodz and Alejandro Jaimes",
  title =        "Introduction to the special section for the best
                 papers of {ACM Multimedia 2008}",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "3",
  pages =        "18:1--18:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1556134.1556135",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:39 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Cesar:2009:FTE,
  author =       "Pablo Cesar and Dick C. A. Bulterman and Jack Jansen
                 and David Geerts and Hendrik Knoche and William
                 Seager",
  title =        "Fragment, tag, enrich, and send: {Enhancing} social
                 sharing of video",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "3",
  pages =        "19:1--19:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1556134.1556136",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:39 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The migration of media consumption to personal
                 computers retains distributed social viewing, but only
                 via nonsocial, strictly personal interfaces. This
                 article presents an architecture, and implementation
                 for media sharing that allows for enhanced social
                 interactions among users. Using a mixed-device model,
                 our work allows targeted, personalized enrichment of
                 content. All recipients see common content, while
                 differentiated content is delivered to individuals via
                 their personal secondary screens. We describe the
                 goals, architecture, and implementation of our system
                 in this article. In order to validate our results, we
                 also present results from two user studies involving
                 disjoint sets of test participants.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Asynchronous media sharing; differentiated content
                 enrichment; secondary screens",
}

@Article{Knoche:2009:BPS,
  author =       "H. Knoche and M. A. Sasse",
  title =        "The big picture on small screens delivering acceptable
                 video quality in mobile {TV}",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "3",
  pages =        "20:1--20:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1556134.1556137",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:39 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Mobile TV viewers can change the viewing distance and
                 (on some devices) scale the picture to their preferred
                 viewing ratio, trading off size for angular resolution.
                 We investigated optimal trade-offs between size and
                 resolution through a series of studies. Participants
                 selected their preferred size and rated the
                 acceptability of the visual experience on a 200ppi
                 device at a 4:3 aspect ratio. They preferred viewing
                 ratios similar to living room TV setups regardless of
                 the much lower resolution: at a minimum 14 pixels per
                 degree. While traveling on trains people required
                 videos with a height larger than 35mm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Mobile multimedia consumption; resolution; size;
                 trade-off",
}

@Article{Mondet:2009:CPP,
  author =       "Sebastien Mondet and Wei Cheng and Geraldine Morin and
                 Romulus Grigoras and Frederic Boudon and Wei Tsang
                 Ooi",
  title =        "Compact and progressive plant models for streaming in
                 networked virtual environments",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "3",
  pages =        "21:1--21:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1556134.1556138",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:39 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Just as in the real world, plants are important
                 objects in virtual worlds for creating pleasant and
                 realistic environments, especially those involving
                 natural scenes. As such, much effort has been made in
                 realistic modeling of plants. As the trend moves
                 towards networked and distributed virtual environments,
                 however, the current models are inadequate as they are
                 not designed for progressive transmissions. In this
                 article, we fill in this gap by proposing a progressive
                 representation for plants based on generalized
                 cylinders. We model the shape and thickness of branches
                 in a plant as B{\'e}zier curves, group the curves
                 according to the similarity, and differentially code
                 the curves to represent the plant in a compact and
                 progressive manner. To facilitate the transmission of
                 the plants, we quantify the visual contribution of each
                 branch and use this weight in packet scheduling. We
                 show the efficiency of our representations and the
                 effectiveness of our packet scheduler through
                 experiments over a wide area network.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "multiresolution; networked virtual environment; plant
                 models; progressive coding; progressive transmission;
                 Streaming",
}

@Article{Wei:2009:CCM,
  author =       "Yong Wei and Suchendra M. Bhandarkar and Kang Li",
  title =        "Client-centered multimedia content adaptation",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "3",
  pages =        "22:1--22:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1556134.1556139",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:39 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The design and implementation of a client-centered
                 multimedia content adaptation system suitable for a
                 mobile environment comprising of resource-constrained
                 handheld devices or clients is described. The primary
                 contributions of this work are: (1) the overall
                 architecture of the client-centered content adaptation
                 system, (2) a data-driven multi-level Hidden Markov
                 model (HMM)-based approach to perform both video
                 segmentation and video indexing in a single pass, and
                 (3) the formulation and implementation of a
                 Multiple-choice Multidimensional Knapsack Problem
                 (MMKP)-based video personalization strategy. In order
                 to segment and index video data, a video stream is
                 modeled at both the semantic unit level and video
                 program level. These models are learned entirely from
                 training data and no domain-dependent knowledge about
                 the structure of video programs is used. This makes the
                 system capable of handling various kinds of videos
                 without having to manually redefine the program model.
                 The proposed MMKP-based personalization strategy is
                 shown to include more relevant video content in
                 response to the client's request than the existing 0/1
                 knapsack problem and fractional knapsack problem-based
                 strategies, and is capable of satisfying multiple
                 client-side constraints simultaneously. Experimental
                 results on CNN news videos and Major League Soccer
                 (MLS) videos are presented and analyzed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "hidden Markov models; multiple choice multidimensional
                 knapsack problem; video indexing; Video
                 personalization",
}

@Article{Sivaram:2009:DMS,
  author =       "G. S. V. S. Sivaram and Mohan S. Kankanhalli and K. R.
                 Ramakrishnan",
  title =        "Design of multimedia surveillance systems",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "3",
  pages =        "23:1--23:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1556134.1556140",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:39 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article addresses the problem of how to select
                 the optimal combination of sensors and how to determine
                 their optimal placement in a surveillance region in
                 order to meet the given performance requirements at a
                 minimal cost for a multimedia surveillance system. We
                 propose to solve this problem by obtaining a
                 performance vector, with its elements representing the
                 performances of subtasks, for a given input combination
                 of sensors and their placement. Then we show that the
                 optimal sensor selection problem can be converted into
                 the form of Integer Linear Programming problem (ILP) by
                 using a linear model for computing the optimal
                 performance vector corresponding to a sensor
                 combination. Optimal performance vector corresponding
                 to a sensor combination refers to the performance
                 vector corresponding to the optimal placement of a
                 sensor combination. To demonstrate the utility of our
                 technique, we design and build a surveillance system
                 consisting of PTZ (Pan-Tilt-Zoom) cameras and active
                 motion sensors for capturing faces. Finally, we show
                 experimentally that optimal placement of sensors based
                 on the design maximizes the system performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Performance vector; sensor selection and placement",
}

@Article{Liu:2009:SSE,
  author =       "Xiaotao Liu and Mark Corner and Prashant Shenoy",
  title =        "{\em {SEVA\/}}: {Sensor-enhanced} video annotation",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "3",
  pages =        "24:1--24:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1556134.1556141",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:39 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article, we study how a sensor-rich world can
                 be exploited by digital recording devices such as
                 cameras and camcorders to improve a user's ability to
                 search through a large repository of image and video
                 files. We design and implement a digital recording
                 system that records identities and locations of objects
                 (as advertised by their sensors) along with visual
                 images (as recorded by a camera). The process, which we
                 refer to as {\em Sensor-Enhanced Video Annotation
                 (SEVA)}, combines a series of correlation,
                 interpolation, and extrapolation techniques. It
                 produces a tagged stream that later can be used to
                 efficiently search for videos or frames containing
                 particular objects or people. We present detailed
                 experiments with a prototype of our system using both
                 stationary and mobile objects as well as GPS and
                 ultrasound. Our experiments show that: (i) SEVA has
                 zero error rates for static objects, except very close
                 to the boundary of the viewable area; (ii) for moving
                 objects or a moving camera, SEVA only misses objects
                 leaving or entering the viewable area by 1--2 frames;
                 (iii) SEVA can scale to 10 fast-moving objects using
                 current sensor technology; and (iv) SEVA runs online
                 using relatively inexpensive hardware.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "context-based retrieval; location-based services;
                 sensor-enhanced; Video annotation",
}

@Article{Wang:2009:MLS,
  author =       "Bing Wang and Wei Wei and Zheng Guo and Don Towsley",
  title =        "Multipath live streaming via {TCP}: {Scheme},
                 performance and benefits",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "3",
  pages =        "25:1--25:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1556134.1556142",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:39 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Motivated by the wide use of TCP for multimedia
                 streaming in practice and the increasing availability
                 of multipath between end hosts, we study multipath live
                 streaming via TCP in this article. We first design a
                 simple and practical TCP-based multipath streaming
                 scheme, named {\em Dynamic MPath-streaming
                 (DMP-streaming)}, which dynamically distributes packets
                 over multiple paths by {\em implicitly inferring\/} the
                 available bandwidths on these paths. To allow
                 systematic performance study, we develop an analytical
                 model for DMP-streaming and validate the model using
                 extensive {\em ns\/} simulation and Internet
                 experiments. We explore the parameter space of this
                 model and find that DMP-streaming generally provides
                 satisfactory performance when the aggregate achievable
                 TCP throughput is 1.6 times the video bitrate, when
                 allowing a few seconds of startup delay. Last, we
                 comment on the benefits of using multipath versus
                 single path for TCP-based streaming.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "multimedia streaming; Performance modeling",
}

@Article{Li:2009:PBR,
  author =       "Mingzhe Li and Mark Claypool and Robert Kinicki",
  title =        "Playout buffer and rate optimization for streaming
                 over {IEEE 802.11} wireless networks",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "3",
  pages =        "26:1--26:??",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1556134.1556143",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:52:39 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Most streaming rate selection and buffer optimization
                 algorithms are developed for wired networks and can
                 perform poorly over wireless networks. Wireless MAC
                 layer behavior, such as rate adaptation,
                 retransmissions, and medium sharing, can significantly
                 degrade the effectiveness of current streaming
                 algorithms. This article presents the Buffer and Rate
                 Optimization for Streaming (BROS) algorithm to improve
                 streaming performance. BROS uses a bandwidth estimation
                 tool designed specifically for wireless networks and
                 models the relationship between buffer size, streaming
                 data rate, and available bandwidth distribution. BROS
                 optimizes the streaming data rate and initial buffer
                 size, resulting in a high data rate but with few frame
                 losses and buffer underflow events, while still keeping
                 a small initial buffer delay. BROS is implemented in
                 the Emulated Streaming (EmuS) client-server system and
                 evaluated on an IEEE 802.11 wireless testbed with
                 various wireless conditions. The evaluation shows that
                 BROS can effectively optimize the streaming rate and
                 initial buffer size based on wireless network bandwidth
                 conditions, thus achieving better performance than
                 static rate or buffer selection and jitter removal
                 buffers.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Multimedia networking; playout buffer; streaming rate;
                 wireless networks",
}

@Article{Sauer:2009:MDC,
  author =       "Danielle Sauer and Yee-Hong Yang",
  title =        "Music-driven character animation",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "4",
  pages =        "27:1--27:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1596990.1596991",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:53:03 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Music-driven character animation extracts musical
                 features from a song and uses them to create an
                 animation. This article presents a system that builds a
                 new animation directly from musical attributes, rather
                 than simply synchronizing it to the music like similar
                 systems. Using a simple script that identifies the
                 movements involved in the performance and their timing,
                 the user can easily control the animation of
                 characters. Another unique feature of the system is its
                 ability to incorporate multiple characters into the
                 same animation, both with synchronized and
                 unsynchronized movements. A system that integrates
                 Celtic dance movements is developed in this article. An
                 evaluation of the results shows that the majority of
                 animations are found to be appealing to viewers and
                 that altering the music can change the attractiveness
                 of the final result.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Character animation; motion synthesis; music analysis;
                 primitive movements",
}

@Article{Deng:2009:SCA,
  author =       "Robert H. Deng and Yanjiang Yang",
  title =        "A study of content authentication in proxy-enabled
                 multimedia delivery systems: {Model}, techniques, and
                 applications",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "4",
  pages =        "28:1--28:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1596990.1596992",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:53:03 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Compared with the direct server-user approach, the
                 server-proxy-user architecture for multimedia delivery
                 promises significantly improved system scalability. The
                 introduction of the intermediary transcoding proxies
                 between content servers and end users in this
                 architecture, however, brings unprecedented challenges
                 to content security. In this article, we present a
                 systematic study on the end-to-end content
                 authentication problem in the server-proxy-user
                 context, where intermediary proxies transcode
                 multimedia content dynamically. We present a formal
                 model for the authentication problem, propose a
                 concrete construction for authenticating generic data
                 modality and formally prove its security. We then apply
                 the generic construction to authenticating specific
                 multimedia formats, for example, JPEG2000 code-streams
                 and MPEG-4 video streams. The prototype implementation
                 shows that our scheme is suitable for practical
                 applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "end-to-end authentication; Multimedia content
                 delivery; security",
}

@Article{Cha:2009:TVS,
  author =       "Jongeun Cha and Mohamad Eid and Abdulmotaleb {El
                 Saddik}",
  title =        "Touchable {$3$D} video system",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "4",
  pages =        "29:1--29:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1596990.1596993",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:53:03 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Multimedia technologies are reaching the limits of
                 providing audio-visual media that viewers consume
                 passively. An important factor, which will ultimately
                 enhance the user's experience in terms of
                 impressiveness and immersion, is interaction. Among
                 daily life interactions, haptic interaction plays a
                 prominent role in enhancing the quality of experience
                 of users, and in promoting physical and emotional
                 development. Therefore, a critical step in multimedia
                 research is expected to bring the sense of touch, or
                 haptics, into multimedia systems and applications. This
                 article proposes a touchable 3D video system where
                 viewers can actively touch a video scene through a
                 force-feedback device, and presents the underlying
                 technologies in three functional components: (1)
                 contents generation, (2) contents transmission, and (3)
                 viewing and interaction. First of all, we introduce a
                 depth image-based haptic representation (DIBHR) method
                 that adds haptic and heightmap images, in addition to
                 the traditional depth image-based representation
                 (DIBR), to encode the haptic surface properties of the
                 video media. In this representation, the haptic image
                 contains the stiffness, static friction, and dynamic
                 friction, whereas the heightmap image contains
                 roughness of the video contents. Based on this
                 representation method, we discuss how to generate
                 synthetic and natural (real) video media through a 3D
                 modeling tool and a depth camera, respectively. Next,
                 we introduce a transmission mechanism based on the
                 MPEG-4 framework where new MPEG-4 BIFS nodes are
                 designed to describe the haptic scene. Finally, a
                 haptic rendering algorithm to compute the interaction
                 force between the scene and the viewer is described. As
                 a result, the performance of the haptic rendering
                 algorithm is evaluated in terms of computational time
                 and smooth contact force. It operates marginally within
                 a 1 kHz update rate that is required to provide stable
                 interaction force and provide smoother contact force
                 with the depth image that has high frequency
                 geometrical noise using a median filter.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "haptic rendering algorithm; Haptic surface properties;
                 video representation",
}

@Article{Benevenuto:2009:VIO,
  author =       "Fabr{\'\i}cio Benevenuto and Tiago Rodrigues and
                 Virgilio Almeida and Jussara Almeida and Keith Ross",
  title =        "Video interactions in online video social networks",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "4",
  pages =        "30:1--30:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1596990.1596994",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:53:03 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article characterizes video-based interactions
                 that emerge from YouTube's video response feature,
                 which allows users to discuss themes and to provide
                 reviews for products or places using much richer media
                 than text. Based on crawled data covering a
                 representative subset of videos and users, we present a
                 characterization from two perspectives: the video
                 response view and the interaction network view. In
                 addition to providing valuable statistical models for
                 various characteristics, our study uncovers typical
                 user behavioral patterns in video-based environments
                 and shows evidence of opportunistic behavior.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "opportunistic behavior; promotion; social media;
                 social networks; video communication; Video
                 interactions; video spam; YouTube",
}

@Article{Erdmann:2009:IEB,
  author =       "Maike Erdmann and Kotaro Nakayama and Takahiro Hara
                 and Shojiro Nishio",
  title =        "Improving the extraction of bilingual terminology from
                 {Wikipedia}",
  journal =      j-TOMCCAP,
  volume =       "5",
  number =       "4",
  pages =        "31:1--31:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1596990.1596995",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:53:03 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Research on the automatic construction of bilingual
                 dictionaries has achieved impressive results. Bilingual
                 dictionaries are usually constructed from parallel
                 corpora, but since these corpora are available only for
                 selected text domains and language pairs, the potential
                 of other resources is being explored as well.\par

                 In this article, we want to further pursue the idea of
                 using Wikipedia as a corpus for bilingual terminology
                 extraction. We propose a method that extracts
                 term-translation pairs from different types of
                 Wikipedia link information. After that, an SVM
                 classifier trained on the features of manually labeled
                 training data determines the correctness of unseen
                 term-translation pairs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Bilingual dictionary; link analysis; Wikipedia
                 mining",
}

@Article{Carlsson:2010:SSL,
  author =       "Niklas Carlsson and Derek L. Eager",
  title =        "Server selection in large-scale video-on-demand
                 systems",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1671954.1671955",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:53:23 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Video on demand, particularly with user-generated
                 content, is emerging as one of the most
                 bandwidth-intensive applications on the Internet. Owing
                 to content control and other issues, some
                 video-on-demand systems attempt to prevent downloading
                 and peer-to-peer content delivery. Instead, such
                 systems rely on server replication, such as via
                 third-party content distribution networks, to support
                 video streaming (or pseudostreaming) to their clients.
                 A major issue with such systems is the cost of the
                 required server resources.\par

                 By synchronizing the video streams for clients that
                 make closely spaced requests for the same video from
                 the same server, server costs (such as for retrieval of
                 the video data from disk) can be amortized over
                 multiple requests. A fundamental trade-off then arises,
                 however, with respect to server selection. Network
                 delivery cost is minimized by selecting the {\em
                 nearest\/} server, while server cost is minimized by
                 directing closely spaced requests for the same video to
                 a {\em common\/} server.\par

                 This article compares classes of server selection
                 policies within the context of a simple system model.
                 We conclude that: (i) server selection using dynamic
                 system state information (rather than only proximities
                 and average loads) can yield large improvements in
                 performance, (ii) deferring server selection for a
                 request as late as possible (i.e., until just before
                 streaming is to begin) can yield additional large
                 improvements, and (iii) within the class of policies
                 using dynamic state information and deferred selection,
                 policies using only ``local'' (rather than global)
                 request information are able to achieve most of the
                 potential performance gains.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "content distribution networks; modeling; Performance
                 analysis; server selection; video-on-demand",
}

@Article{Agarwal:2010:BRW,
  author =       "Parag Agarwal and Balakrishnan Prabhakaran",
  title =        "Blind robust watermarking of {$3$D} motion data",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1671954.1671956",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:53:23 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The article addresses the problem of copyright
                 protection for 3D motion-captured data by designing a
                 robust blind watermarking mechanism. The mechanism
                 segments motion capture data and identifies clusters of
                 3D points per segment. A watermark can be embedded and
                 extracted within these clusters by using a proposed
                 extension of 3D quantization index modulation. The
                 watermarking scheme is blind in nature and the encoded
                 watermarks are shown to be imperceptible, and secure.
                 The resulting hiding capacity has bounds based on
                 cluster size. The watermarks are shown to be robust
                 against attacks such as uniform affine transformations
                 (scaling, rotation, and translation), cropping,
                 reordering, and noise addition. The time complexity for
                 watermark embedding and extraction is estimated as
                 O({\em n\/} log {\em n\/}) and O({\em n\/}$^2$ log {\em
                 n\/}), respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "blind; decoding; encoding; spatial; Watermarking",
}

@Article{Yang:2010:DMD,
  author =       "Bo Yang",
  title =        "{DSI}: a model for distributed multimedia semantic
                 indexing and content integration",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1671954.1671957",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:53:23 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Considerable research has been done on the
                 content-based multimedia delivery and access in
                 distributed data repositories. As noted in the
                 literature, there is always a trade-off between
                 multimedia quality and access speed. In addition, the
                 overall performance is greatly determined by the
                 distribution of the multimedia data. In this article,
                 an unsupervised multimedia semantic integration
                 approach for a distributed infrastructure, the
                 Distributed Semantic Indexing (DSI), is presented that
                 addresses both the data quality and search performance.
                 With the ability of summarizing content information and
                 guiding data distribution, the proposed approach is
                 distinguished by: (1) logic-based representation and
                 concise abstraction of the semantic contents of
                 multimedia data, which are further integrated to form a
                 general overview of a multimedia data repository ---
                 content signature; (2) application of linguistic
                 relationships to construct a hierarchical metadata
                 based on the content signatures allowing imprecise
                 queries; and (3) achieving the optimal performance in
                 terms of search cost. The fundamental structure of the
                 proposed model is presented. The proposed scheme has
                 been simulated and the simulation results are analyzed
                 and compared against several other approaches that have
                 been advocated in the literature.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "distributed indexing; image retrieval; Semantic
                 representation",
}

@Article{Nystrom:2010:ECO,
  author =       "Marcus Nystr{\"o}m and Kenneth Holmqvist",
  title =        "Effect of compressed offline foveated video on viewing
                 behavior and subjective quality",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1671954.1671958",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:53:23 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Offline foveation is a technique to improve the
                 compression efficiency of digitized video. The general
                 idea behind offline foveation is to blur video regions
                 where no or a small number of previewers look without
                 decreasing the subjective quality for later viewers. It
                 relies on the fact that peripheral vision is reduced
                 compared to central vision, and the observation that
                 during free-viewing humans' gaze positions generally
                 coincide when watching video. In this article, we
                 conduct two experiments to assess how offline foveation
                 affects viewing behavior and subjective quality. In the
                 first experiment, 15 subjects free-viewed six video
                 clips before and after offline foveation whereas in the
                 second experiment we had 17 subjects assessing the
                 quality of these videos after one, two, and three
                 consecutive viewings. Eye movements were measured
                 during the experiments. Results showed that, although
                 offline foveation prior to encoding with H.264 yielded
                 data reductions up to 52\% (20\% average) on the tested
                 videos, it had little or no effect on where people
                 looked, their intersubject dispersion, fixation
                 duration, saccade amplitude, or the experienced quality
                 during first-time viewing. However, seeing the videos
                 more than once increased the intersubject dispersion
                 and decreased the subjective quality. In view of these
                 results, we discuss the usage of offline foveated video
                 in practical applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Eye-tracking; foveation; subjective quality; video
                 compression",
}

@Article{Ivanov:2010:RTH,
  author =       "Yuri V. Ivanov and C. J. Bleakley",
  title =        "Real-time {H.264} video encoding in software with fast
                 mode decision and dynamic complexity control",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1671954.1671959",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:53:23 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents a novel real-time algorithm for
                 reducing and dynamically controlling the computational
                 complexity of an H.264 video encoder implemented in
                 software. A fast mode decision algorithm, based on a
                 Pareto-optimal macroblock classification scheme, is
                 combined with a dynamic complexity control algorithm
                 that adjusts the MB class decisions such that a
                 constant frame rate is achieved. The average coding
                 efficiency of the proposed algorithm was found to be
                 similar to that of conventional encoding operating at
                 half the frame rate. The proposed algorithm was found
                 to provide lower average bitrate and distortion than
                 static complexity scaling.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "complexity; complexity control; fast mode decision;
                 H/264/AVC; mode decision; rate distortion; real time",
}

@Article{Hefeeda:2010:ASM,
  author =       "Mohamed Hefeeda and Kianoosh Mokhtarian",
  title =        "Authentication schemes for multimedia streams:
                 {Quantitative} analysis and comparison",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "1",
  pages =        "6:1--6:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1671954.1671960",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Mar 16 18:53:23 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "With the rapid increase in the demand for multimedia
                 services, securing the delivery of multimedia content
                 has become an important issue. Accordingly, the problem
                 of multimedia stream authentication has received
                 considerable attention by previous research and various
                 solutions have been proposed. However, these solutions
                 have not been rigorously analyzed and contrasted to
                 each other, and thus their relative suitability for
                 different streaming environments is not clear. This
                 article presents comprehensive analysis and comparison
                 among different schemes proposed in the literature to
                 authenticate multimedia streams. Authentication schemes
                 for nonscalable and scalable multimedia streams are
                 analyzed. To conduct this analysis, we define five
                 important performance metrics, which are computation
                 cost, communication overhead, receiver buffer size,
                 delay, and tolerance to packet losses. We derive
                 analytic formulas for these metrics for all considered
                 authentication schemes to numerically analyze their
                 performance. In addition, we implement all schemes in a
                 simulator to study and compare their performance in
                 different environments. The parameters for the
                 simulator are carefully chosen to mimic realistic
                 settings. We draw several conclusions on the advantages
                 and disadvantages of each scheme. We extend our
                 analysis to authentication techniques for scalable
                 streams. We pay careful attention to the flexibility of
                 scalable streams and analyze its impacts on the
                 authentication schemes. Our analysis and comparison
                 reveal the merits and shortcomings of each scheme,
                 provide guidelines on choosing the most appropriate
                 scheme for a given multimedia streaming application,
                 and could stimulate designing new authentication
                 schemes or improving existing ones. For example, our
                 detailed analysis has led us to design a new
                 authentication scheme that combines the best features
                 of two previous schemes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "authentication schemes; Multimedia authentication;
                 multimedia security; multimedia streaming; scalable
                 coding; secure streaming",
}

@Article{Yang:2010:EMP,
  author =       "Zhenyu Yang and Wanmin Wu and Klara Nahrstedt and
                 Gregorij Kurillo and Ruzena Bajcsy",
  title =        "Enabling multi-party {$3$D} tele-immersive
                 environments with {{\em ViewCast}}",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "2",
  pages =        "7:1--7:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1671962.1671963",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Aug 14 17:17:15 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Three-dimensional tele-immersive (3DTI) environments
                 have great potential to promote collaborative work
                 among geographically distributed users. However, most
                 existing 3DTI systems only work with two sites due to
                 the huge demand of resources and the lack of a simple
                 yet powerful networking model to handle connectivity,
                 scalability, and quality-of-service (QoS)
                 guarantees.\par

                 In this article, we explore the design space from the
                 angle of multi-stream management to enable multi-party
                 3DTI communication. Multiple correlated 3D video
                 streams are employed to provide a comprehensive
                 representation of the physical scene in each 3DTI
                 environment, and are rendered together to establish a
                 common cyberspace among all participating 3DTI
                 environments. The existence of multi-stream correlation
                 provides the unique opportunity for new approaches in
                 QoS provisioning. Previous work mostly concentrated on
                 compression and adaptation techniques on the per-stream
                 basis while ignoring the application layer semantics
                 and the coordination required among streams. We propose
                 an innovative and generalized {\em ViewCast\/} model to
                 coordinate the multi-stream content dissemination over
                 an overlay network. ViewCast leverages view semantics
                 in 3D free-viewpoint video systems to fill the gap
                 between high-level user interest and low-level stream
                 management. In ViewCast, only the view information is
                 specified by the user/application, while the underlying
                 control dynamically performs stream differentiation,
                 selection, coordination, and dissemination. We present
                 the details of ViewCast and evaluate it through both
                 simulation and 3DTI sessions among tele-immersive
                 environments residing in different institutes across
                 the Internet2. Our experimental results demonstrate the
                 implementation feasibility and performance enhancement
                 of ViewCast in supporting multi-party 3DTI
                 collaboration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "3D tele-immersion; application level multicast;
                 distributed multimedia system; multi-stream
                 coordination; networking protocol; QoS adaptation",
}

@Article{Wu:2010:ELT,
  author =       "Junwen Wu and Mohan M. Trivedi",
  title =        "An eye localization, tracking and blink pattern
                 recognition system: {Algorithm} and evaluation",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "2",
  pages =        "8:1--8:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1671962.1671964",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Aug 14 17:17:15 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This study is to investigate the fundamental problems
                 of, (1) facial feature detection and localization,
                 especially eye features; and (2) eye dynamics,
                 including tracking and blink detection. We first
                 describe our contribution to eye localization.
                 Following that, we discuss a simultaneous eye tracking
                 and blink detection system. Facial feature detection is
                 solved in a general object detection framework and its
                 performance for eye localization is presented. A binary
                 tree representation based on feature dependency
                 partitions the object feature space in a coarse to fine
                 manner. In each compact feature subspace, independent
                 component analysis (ICA) is used to get the independent
                 sources, whose probability density functions (PDFs) are
                 modeled by Gaussian mixtures. When applying this
                 representation for the task of eye detection, a
                 subwindow is used to scan the entire image and each
                 obtained image patch is examined using Bayesian
                 criteria to determine the presence of an eye subject.
                 After the eyes are automatically located with binary
                 tree-based probability learning, interactive particle
                 filters are used for simultaneously tracking the eyes
                 and detecting the blinks. The particle filters use
                 classification-based observation models, in which the
                 posterior probabilities are evaluated by logistic
                 regressions in tensor subspaces. Extensive experiments
                 are used to evaluate the performance from two aspects,
                 (1) blink detection rate and the accuracy of blink
                 duration in terms of the frame numbers; (2) eye
                 tracking accuracy. We also present an experimental
                 setup for obtaining the benchmark data in tracking
                 accuracy evaluation. The experimental evaluation
                 demonstrates the capability of this approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Eye blink detection; human computer interface;
                 particle filtering; video processing",
}

@Article{Jin:2010:DMN,
  author =       "Xing Jin and S.-H. Gary Chan",
  title =        "Detecting malicious nodes in peer-to-peer streaming by
                 peer-based monitoring",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "2",
  pages =        "9:1--9:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1671962.1671965",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Aug 14 17:17:15 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Current peer-to-peer (P2P) streaming systems often
                 assume that nodes cooperate to upload and download
                 data. However, in the open environment of the Internet,
                 this is not necessarily true and there exist malicious
                 nodes in the system. In this article, we study
                 malicious actions of nodes that can be detected through
                 peer-based monitoring. We require each node to monitor
                 the data received and to periodically send monitoring
                 messages about its neighbors to some trustworthy nodes.
                 To efficiently store and search messages among multiple
                 trustworthy nodes, we organize trustworthy nodes into a
                 threaded binary tree. Trustworthy nodes also
                 dynamically redistribute monitoring messages among
                 themselves to achieve load balancing. Our simulation
                 results show that this scheme can efficiently detect
                 malicious nodes with high accuracy, and that the
                 dynamic redistribution method can achieve good load
                 balancing among trustworthy nodes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Malicious nodes; peer monitoring; peer-to-peer
                 streaming",
}

@Article{Chiu:2010:FMH,
  author =       "Chih-Yi Chiu and Hsin-Min Wang and Chu-Song Chen",
  title =        "Fast min-hashing indexing and robust spatio-temporal
                 matching for detecting video copies",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "2",
  pages =        "10:1--10:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1671962.1671966",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Aug 14 17:17:15 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The increase in the number of video copies, both legal
                 and illegal, has become a major problem in the
                 multimedia and Internet era. In this article, we
                 propose a novel method for detecting various video
                 copies in a video sequence. To achieve fast and robust
                 detection, the method fully integrates several
                 components, namely the min-hashing signature to
                 compactly represent a video sequence, a spatio-temporal
                 matching scheme to accurately evaluate video similarity
                 compiled from the spatial and temporal aspects, and
                 some speedup techniques to expedite both min-hashing
                 indexing and spatio-temporal matching. The results of
                 experiments demonstrate that, compared to several
                 baseline methods with different feature descriptors and
                 matching schemes, the proposed method which combines
                 both global and local feature descriptors yields the
                 best performance when encountering a variety of video
                 transformations. The method is very fast, requiring
                 approximately 0.06 seconds to search for copies of a
                 thirty-second video clip in a six-hour video
                 sequence.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Content-based copy detection; histogram pruning;
                 near-duplicate",
}

@Article{Sarhan:2010:WTP,
  author =       "Nabil J. Sarhan and Mohammad A. Alsmirat and Musab
                 Al-Hadrusi",
  title =        "Waiting-time prediction in scalable on-demand video
                 streaming",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "2",
  pages =        "11:1--11:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1671962.1671967",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Sat Aug 14 17:17:15 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Providing video streaming users with expected waiting
                 times enhances their perceived quality-of-service (QoS)
                 and encourages them to wait. In the absence of any
                 waiting-time feedback, users are more likely to defect
                 because of the uncertainty as to when their services
                 will start. We analyze waiting-time predictability in
                 scalable video streaming. We propose two prediction
                 schemes and study their effectiveness when applied with
                 various stream merging techniques and scheduling
                 policies. The results demonstrate that the waiting time
                 can be predicted accurately, especially when enhanced
                 cost-based scheduling is applied. The combination of
                 waiting-time prediction and cost-based scheduling leads
                 to outstanding performance benefits.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
  keywords =     "Scheduling; stream merging; time-of-service
                 guarantees; video streaming; waiting-time prediction",
}

@Article{Xu:2010:IBP,
  author =       "Changsheng Xu and Eckehard Steinbach and Abdulmotaleb
                 {El Saddik} and Michelle Zhou",
  title =        "Introduction to the best papers of {ACM Multimedia
                 2009}",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "3",
  pages =        "12:1--12:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1823746.1830482",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zha:2010:VQS,
  author =       "Zheng-Jun Zha and Linjun Yang and Tao Mei and Meng
                 Wang and Zengfu Wang and Tat-Seng Chua and Xian-Sheng
                 Hua",
  title =        "Visual query suggestion: {Towards} capturing user
                 intent in {Internet} image search",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "3",
  pages =        "13:1--13:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1823746.1823747",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Jiang:2010:AVA,
  author =       "Wei Jiang and Courtenay Cotton and Shih-Fu Chang and
                 Dan Ellis and Alexander C. Loui",
  title =        "Audio-visual atoms for generic video concept
                 classification",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "3",
  pages =        "14:1--14:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1823746.1823748",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{DeOliveira:2010:LND,
  author =       "Rodrigo {De Oliveira} and Mauro Cherubini and Nuria
                 Oliver",
  title =        "Looking at near-duplicate videos from a human-centric
                 perspective",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "3",
  pages =        "15:1--15:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1823746.1823749",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yin:2010:LEC,
  author =       "Hao Yin and Xuening Liu and Tongyu Zhan and Vyas Sekar
                 and Feng Qiu and Chuang Lin and Hui Zhang and Bo Li",
  title =        "{LiveSky}: {Enhancing} {CDN} with {P2P}",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "3",
  pages =        "16:1--16:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1823746.1823750",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Money:2010:EEL,
  author =       "Arthur G. Money and Harry Agius",
  title =        "{ELVIS}: {Entertainment-Led VIdeo Summaries}",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "3",
  pages =        "17:1--17:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1823746.1823751",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hoi:2010:SSD,
  author =       "Steven C. h. Hoi and Wei Liu and Shih-Fu Chang",
  title =        "Semi-supervised distance metric learning for
                 collaborative image retrieval and clustering",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "3",
  pages =        "18:1--18:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1823746.1823752",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Maddage:2010:WLA,
  author =       "Namunu C. Maddage and Khe Chai Sim and Haizhou Li",
  title =        "Word level automatic alignment of music and lyrics
                 using vocal synthesis",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "3",
  pages =        "19:1--19:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1823746.1823753",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Qudah:2010:EDD,
  author =       "Bashar Qudah and Nabil J. Sarhan",
  title =        "Efficient delivery of on-demand video streams to
                 heterogeneous receivers",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "3",
  pages =        "20:1--20:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1823746.1823754",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Gomes:2010:STA,
  author =       "Jo{\~a}o V. P. Gomes and Pedro R. M. In{\'a}cio and
                 Branka Lakic and M{\'a}rio M. Freire and Henrique J. A.
                 Da Silva and Paulo P. Monteiro",
  title =        "Source traffic analysis",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "3",
  pages =        "21:1--21:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1823746.1823755",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Boll:2010:CPA,
  author =       "Susanne Boll and Jiebo Luo and Ramesh Jain and Dong
                 Xu",
  title =        "Call for papers: {ACM Transactions on Multimedia
                 Computing, Communications and Applications} special
                 issue on social media",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "3",
  pages =        "22:1--22:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1823746.1837254",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Steinmetz:2010:OOD,
  author =       "Ralf Steinmetz",
  title =        "Obituary to our dear friend {Professor Dr. Nicolas D.
                 Georganas, PhD}",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "4",
  pages =        "23:1--23:??",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1865106.1865107",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Haenselmann:2010:FSI,
  author =       "Thomas Haenselmann",
  title =        "Foreword to the special issue on multimedia sensor
                 fusion",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "4",
  pages =        "24:1--24:??",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1865106.1865108",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wang:2010:MBA,
  author =       "Xiangyu Wang and Mohan Kankanhalli",
  title =        "{MultiFusion}: a boosting approach for multimedia
                 fusion",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "4",
  pages =        "25:1--25:??",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1865106.1865109",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chetty:2010:MSF,
  author =       "Girija Chetty and Matthew White",
  title =        "Multimedia sensor fusion for retrieving identity in
                 biometric access control systems",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "4",
  pages =        "26:1--26:??",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1865106.1865110",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Friedland:2010:DAS,
  author =       "Gerald Friedland and Chuohao Yeo and Hayley Hung",
  title =        "Dialocalization: {Acoustic} speaker diarization and
                 visual localization as joint optimization problem",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "4",
  pages =        "27:1--27:??",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1865106.1865111",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Rahman:2010:SGA,
  author =       "Abu Saleh Md Mahfujur Rahman and M. Anwar Hossain and
                 Abdulmotaleb {El Saddik}",
  title =        "Spatial-geometric approach to physical mobile
                 interaction based on accelerometer and {IR} sensory
                 data fusion",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "4",
  pages =        "28:1--28:??",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1865106.1865112",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yang:2010:EMT,
  author =       "Zhenyu Yang and Wanmin Wu and Klara Nahrstedt and
                 Gregorij Kurillo and Ruzena Bajcsy",
  title =        "Enabling multiparty {$3$D} tele-immersive environments
                 with {ViewCast}",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "4",
  pages =        "29:1--29:??",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1865106.1865113",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Marshall:2010:OCM,
  author =       "Damien Marshall and S{\'e}amus Mcloone and Tom{\'a}s
                 Ward",
  title =        "Optimizing consistency by maximizing bandwidth usage
                 in distributed interactive applications",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "4",
  pages =        "30:1--30:??",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1865106.1865114",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Vu:2010:UOC,
  author =       "Long Vu and Indranil Gupta and Klara Nahrstedt and Jin
                 Liang",
  title =        "Understanding overlay characteristics of a large-scale
                 peer-to-peer {IPTV} system",
  journal =      j-TOMCCAP,
  volume =       "6",
  number =       "4",
  pages =        "31:1--31:??",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1865106.1865115",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Tue Nov 23 10:03:16 MST 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Meyer:2011:MRL,
  author =       "Marek Meyer and Christoph Rensing and Ralf Steinmetz",
  title =        "Multigranularity reuse of learning resources",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870121.1870122",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:41 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Bouyakoub:2011:SBI,
  author =       "Samia Bouyakoub and Abdelkader Belkhir",
  title =        "{SMIL} builder: an incremental authoring tool for
                 {SMIL Documents}",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870121.1870123",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:41 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hossain:2011:MAQ,
  author =       "M. Anwar Hossain and Pradeep K. Atrey and Abdulmotaleb
                 {El Saddik}",
  title =        "Modeling and assessing quality of information in
                 multisensor multimedia monitoring systems",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870121.1870124",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:41 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhu:2011:NDK,
  author =       "Jianke Zhu and Steven C. H. Hoi and Michael R. Lyu and
                 Shuicheng Yan",
  title =        "Near-duplicate keyframe retrieval by semi-supervised
                 learning and nonrigid image matching",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870121.1870125",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:41 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hsu:2011:FCL,
  author =       "Cheng-Hsin Hsu and Mohamed Hefeeda",
  title =        "A framework for cross-layer optimization of video
                 streaming in wireless networks",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870121.1870126",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:41 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chandra:2011:EAS,
  author =       "Surendar Chandra and Xuwen Yu",
  title =        "An empirical analysis of serendipitous media sharing
                 among campus-wide wireless users",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870121.1870127",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:41 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Gopinathan:2011:OLM,
  author =       "Ajay Gopinathan and Zongpeng Li",
  title =        "Optimal layered multicast",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "2",
  pages =        "7:1--7:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1925101.1925102",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:42 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hsu:2011:USS,
  author =       "Cheng-Hsin Hsu and Mohamed Hefeeda",
  title =        "Using simulcast and scalable video coding to
                 efficiently control channel switching delay in mobile
                 {TV} broadcast networks",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "2",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1925101.1925103",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:42 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Jin:2011:KDH,
  author =       "Yohan Jin and Balakrishnan Prabhakaran",
  title =        "Knowledge discovery from {$3$D} human motion streams
                 through semantic dimensional reduction",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "2",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1925101.1925104",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:42 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Cheng:2011:MPM,
  author =       "Wei Cheng and Wei Tsang Ooi and Sebastien Mondet and
                 Romulus Grigoras and G{\'e}raldine Morin",
  title =        "Modeling progressive mesh streaming: {Does} data
                 dependency matter?",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "2",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1925101.1925105",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:42 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Bagchi:2011:FAD,
  author =       "Susmit Bagchi",
  title =        "A fuzzy algorithm for dynamically adaptive multimedia
                 streaming",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "2",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1925101.1925106",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:42 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hsu:2011:SMV,
  author =       "Cheng-Hsin Hsu and Mohamed Hefeeda",
  title =        "Statistical multiplexing of variable-bit-rate videos
                 streamed to mobile devices",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "2",
  pages =        "12:1--12:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1925101.1925107",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Wed Mar 16 09:25:42 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Steinmetz:2011:EN,
  author =       "Ralf Steinmetz",
  title =        "Editorial notice",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "3",
  pages =        "13:1--13:??",
  month =        aug,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2000486.2000487",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Sep 5 17:00:22 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Korshunov:2011:VQF,
  author =       "Pavel Korshunov and Wei Tsang Ooi",
  title =        "Video quality for face detection, recognition, and
                 tracking",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "3",
  pages =        "14:1--14:??",
  month =        aug,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2000486.2000488",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Sep 5 17:00:22 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lin:2011:PCI,
  author =       "Pei-Yu Lin and Jung-San Lee and Chin-Chen Chang",
  title =        "Protecting the content integrity of digital imagery
                 with fidelity preservation",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "3",
  pages =        "15:1--15:??",
  month =        aug,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2000486.2000489",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Sep 5 17:00:22 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{VanLeuken:2011:SVO,
  author =       "Reinier H. {Van Leuken} and Remco C. Veltkamp",
  title =        "Selecting vantage objects for similarity indexing",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "3",
  pages =        "16:1--16:??",
  month =        aug,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2000486.2000490",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Sep 5 17:00:22 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Feng:2011:SRI,
  author =       "Wu-Chi Feng and Thanh Dang and John Kassebaum and Tim
                 Bauman",
  title =        "Supporting region-of-interest cropping through
                 constrained compression",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "3",
  pages =        "17:1--17:??",
  month =        aug,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2000486.2000491",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Sep 5 17:00:22 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2011:DBA,
  author =       "Qingzhong Liu and Andrew H. Sung and Mengyu Qiao",
  title =        "Derivative-based audio steganalysis",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "3",
  pages =        "18:1--18:??",
  month =        aug,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2000486.2000492",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Sep 5 17:00:22 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Li:2011:GDO,
  author =       "Frederick W. B. Li and Rynson W. H. Lau and Danny
                 Kilis and Lewis W. F. Li",
  title =        "Game-on-demand:: an online game engine based on
                 geometry streaming",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "3",
  pages =        "19:1--19:??",
  month =        aug,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2000486.2000493",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  bibdate =      "Mon Sep 5 17:00:22 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Shirmohammadi:2011:IAM,
  author =       "Shervin Shirmohammadi and Jiebo Luo and Jie Yang and
                 Abdulmotaleb {El Saddik}",
  title =        "Introduction to {ACM Multimedia 2010} best paper
                 candidates",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "20:1--20:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037677",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Bhattacharya:2011:HAA,
  author =       "Subhabrata Bhattacharya and Rahul Sukthankar and
                 Mubarak Shah",
  title =        "A holistic approach to aesthetic enhancement of
                 photographs",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "21:1--21:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037678",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Tan:2011:URS,
  author =       "Shulong Tan and Jiajun Bu and Chun Chen and Bin Xu and
                 Can Wang and Xiaofei He",
  title =        "Using rich social media information for music
                 recommendation via hypergraph model",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "22:1--22:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037679",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Milani:2011:CAE,
  author =       "Simone Milani and Giancarlo Calvagno",
  title =        "A cognitive approach for effective coding and
                 transmission of {$3$D} video",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "23:1--23:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037680",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hong:2011:VAE,
  author =       "Richang Hong and Meng Wang and Xiao-Tong Yuan and
                 Mengdi Xu and Jianguo Jiang and Shuicheng Yan and
                 Tat-Seng Chua",
  title =        "Video accessibility enhancement for hearing-impaired
                 users",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "24:1--24:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037681",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Boll:2011:ISI,
  author =       "Susanne Boll and Ramesh Jain and Jiebo Luo and Dong
                 Xu",
  title =        "Introduction to special issue on social media",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "25:1--25:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037682",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lin:2011:EOM,
  author =       "Yu-Ching Lin and Yi-Hsuan Yang and Homer H. Chen",
  title =        "Exploiting online music tags for music emotion
                 classification",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "26:1--26:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037683",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Rabbath:2011:ACP,
  author =       "Mohamad Rabbath and Philipp Sandhaus and Susanne
                 Boll",
  title =        "Automatic creation of photo books from stories in
                 social media",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "27:1--27:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037684",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hu:2011:RAI,
  author =       "Weiming Hu and Haiqiang Zuo and Ou Wu and Yunfei Chen
                 and Zhongfei Zhang and David Suter",
  title =        "Recognition of adult images, videos, and web page
                 bags",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "28:1--28:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037685",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lin:2011:SSC,
  author =       "Yu-Ru Lin and K. Sel{\c{c}}cuk Candan and Hari
                 Sundaram and Lexing Xie",
  title =        "{SCENT}: {Scalable} compressed monitoring of evolving
                 multirelational social networks",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "29:1--29:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037686",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Sang:2011:BCT,
  author =       "Jitao Sang and Changsheng Xu",
  title =        "Browse by chunks: {Topic} mining and organizing on
                 web-scale social media",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "30:1--30:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037687",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ji:2011:MFL,
  author =       "Rongrong Ji and Yue Gao and Bineng Zhong and Hongxun
                 Yao and Qi Tian",
  title =        "Mining {\tt flickr} landmarks by modeling
                 reconstruction sparsity",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "31:1--31:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037688",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Mandel:2011:CTI,
  author =       "Michael I. Mandel and Razvan Pascanu and Douglas Eck
                 and Yoshua Bengio and Luca M. Aiello and Rossano
                 Schifanella and Filippo Menczer",
  title =        "Contextual tag inference",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "32:1--32:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037689",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Biel:2011:VCB,
  author =       "Joan-Isaac Biel and Daniel Gatica-Perez",
  title =        "{VlogSense}: {Conversational} behavior and social
                 attention in {YouTube}",
  journal =      j-TOMCCAP,
  volume =       "7S",
  number =       "1",
  pages =        "33:1--33:??",
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2037676.2037690",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun Nov 6 06:36:59 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Anonymous:2011:TCO,
  author =       "Anonymous",
  title =        "Table of Contents: Online Supplement Volume {7S},
                 Number 1",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "4",
  pages =        "34:1--34:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043612.2043620",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Dec 15 08:53:32 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hong:2011:BSE,
  author =       "Richang Hong and Jinhui Tang and Hung-Khoon Tan and
                 Chong-Wah Ngo and Shuicheng Yan and Tat-Seng Chua",
  title =        "Beyond search: Event-driven summarization for {Web}
                 videos",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "4",
  pages =        "35:1--35:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043612.2043613",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Dec 15 08:53:32 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Kuo:2011:TPQ,
  author =       "Wen-Kuang Kuo and Kuo-Wei Wu",
  title =        "Traffic prediction and {QoS} transmission of real-time
                 live {VBR} videos in {WLANs}",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "4",
  pages =        "36:1--36:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043612.2043614",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Dec 15 08:53:32 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Maddage:2011:BSS,
  author =       "Namunu C. Maddage and Haizhou Li",
  title =        "Beat space segmentation and octave scale cepstral
                 feature for sung language recognition in pop music",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "4",
  pages =        "37:1--37:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043612.2043615",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Dec 15 08:53:32 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Santini:2011:ECQ,
  author =       "Simone Santini",
  title =        "Efficient computation of queries on feature streams",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "4",
  pages =        "38:1--38:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043612.2043616",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Dec 15 08:53:32 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Verdugo:2011:IFC,
  author =       "Renato Verdugo and Miguel Nussbaum and Pablo Corro and
                 Pablo Nu{\~n}nez and Paula Navarrete",
  title =        "Interactive films and coconstruction",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "4",
  pages =        "39:1--39:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043612.2043617",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Dec 15 08:53:32 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ghandeharizadeh:2011:DCC,
  author =       "Shahram Ghandeharizadeh and Shahin Shayandeh",
  title =        "Domical cooperative caching for streaming media in
                 wireless home networks",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "4",
  pages =        "40:1--40:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043612.2043618",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Dec 15 08:53:32 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ghandeharizadeh:2011:CPS,
  author =       "Shahram Ghandeharizadeh and Shahin Shayandeh",
  title =        "Call for papers: Special issue on {$3$D} mobile
                 multimedia",
  journal =      j-TOMCCAP,
  volume =       "7",
  number =       "4",
  pages =        "41:1--41:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043612.2043619",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Dec 15 08:53:32 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Steinmetz:2012:ENC,
  author =       "Ralf Steinmetz",
  title =        "Editorial note and call for nominations: {Nicolas D.
                 Georganas} best paper award",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2071396.2071397",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:02 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ghinea:2012:SSS,
  author =       "Georghita Ghinea and Oluwakemi Ademoye",
  title =        "The sweet smell of success: Enhancing multimedia
                 applications with olfaction",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2071396.2071398",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:02 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Olfaction, or smell, is one of the last challenges
                 which multimedia applications have to conquer. As far
                 as computerized smell is concerned, there are several
                 difficulties to overcome, particularly those associated
                 with the ambient nature of smell. In this article, we
                 present results from an empirical study exploring
                 users' perception of olfaction-enhanced multimedia
                 displays. Findings show that olfaction significantly
                 adds to the user multimedia experience. Moreover, use
                 of olfaction leads to an increased sense of reality and
                 relevance. Our results also show that users are
                 tolerant of the interference and distortion effects
                 caused by olfactory effect in multimedia.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hefeeda:2012:DET,
  author =       "Mohamed Hefeeda and Cheng-Hsin Hsu",
  title =        "Design and evaluation of a testbed for mobile {TV}
                 networks",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2071396.2071399",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:02 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents the design of a complete,
                 open-source, testbed for broadcast networks that offer
                 mobile TV services. Although basic architectures and
                 protocols have been developed for such networks,
                 detailed performance tuning and analysis are still
                 needed, especially when these networks scale to serve
                 many diverse TV channels to numerous subscribers. The
                 detailed performance analysis could also motivate
                 designing new protocols and algorithms for enhancing
                 future mobile TV networks. Currently, many researchers
                 evaluate the performance of mobile TV networks using
                 simulation and/or theoretical modeling methods. These
                 methods, while useful for early assessment, typically
                 abstract away many necessary details of actual, fairly
                 complex, networks. Therefore, an open-source platform
                 for evaluating new ideas in a real mobile TV network is
                 needed. This platform is currently not possible with
                 commercial products, because they are sold as black
                 boxes without the source code. In this article, we
                 summarize our experiences in designing and implementing
                 a testbed for mobile TV networks. We integrate
                 off-the-shelf hardware components with carefully
                 designed software modules to realize a scalable testbed
                 that covers almost all aspects of real networks. We use
                 our testbed to empirically analyze various performance
                 aspects of mobile TV networks and validate/refute
                 several claims made in the literature as well as
                 discover/quantify multiple important performance
                 tradeoffs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lin:2012:DMS,
  author =       "Yu-Ru Lin and Hari Sundaram and Munmun {De Choudhury}
                 and Aisling Kelliher",
  title =        "Discovering multirelational structure in social media
                 streams",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2071396.2071400",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:02 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article, we present a novel algorithm to
                 discover multirelational structures from social media
                 streams. A media item such as a photograph exists as
                 part of a meaningful interrelationship among several
                 attributes, including time, visual content, users, and
                 actions. Discovery of such relational structures
                 enables us to understand the semantics of human
                 activity and has applications in content organization,
                 recommendation algorithms, and exploratory social
                 network analysis. We are proposing a novel nonnegative
                 matrix factorization framework to characterize
                 relational structures of group photo streams. The
                 factorization incorporates image content features and
                 contextual information. The idea is to consider a
                 cluster as having similar relational patterns; each
                 cluster consists of photos relating to similar content
                 or context. Relations represent different aspects of
                 the photo stream data, including visual content,
                 associated tags, photo owners, and post times. The
                 extracted structures minimize the mutual information of
                 the predicted joint distribution. We also introduce a
                 relational modularity function to determine the
                 structure cost penalty, and hence determine the number
                 of clusters. Extensive experiments on a large Flickr
                 dataset suggest that our approach is able to extract
                 meaningful relational patterns from group photo
                 streams. We evaluate the utility of the discovered
                 structures through a tag prediction task and through a
                 user study. Our results show that our method based on
                 relational structures, outperforms baseline methods,
                 including feature and tag frequency based techniques,
                 by 35\%--420\%. We have conducted a qualitative user
                 study to evaluate the benefits of our framework in
                 exploring group photo streams. The study indicates that
                 users found the extracted clustering results clearly
                 represent major themes in a group; the clustering
                 results not only reflect how users describe the group
                 data but often lead the users to discover the evolution
                 of the group activity.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Cheng:2012:EIC,
  author =       "Xu Cheng and Jiangchuan Liu",
  title =        "Exploring interest correlation for peer-to-peer
                 socialized video sharing",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2071396.2071401",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:02 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The last five years have witnessed an explosion of
                 networked video sharing, represented by YouTube, as a
                 new killer Internet application. Their sustainable
                 development however is severely hindered by the
                 intrinsic limit of their client/server architecture. A
                 shift to the peer-to-peer paradigm has been widely
                 suggested with success already shown in live video
                 streaming and movie-on-demand. Unfortunately, our
                 latest measurement demonstrates that short video clips
                 exhibit drastically different statistics, which would
                 simply render these existing solutions suboptimal, if
                 not entirely inapplicable. Our long-term measurement
                 over five million YouTube videos, on the other hand,
                 reveals interesting social networks with strong
                 correlation among the videos, thus opening new
                 opportunities to explore. In this article, we present
                 NetTube, a novel peer-to-peer assisted delivering
                 framework that explores the user interest correlation
                 for short video sharing. We address a series of key
                 design issues to realize the system, including a
                 bi-layer overlay, an efficient indexing scheme, a
                 delay-aware scheduling mechanism, and a prefetching
                 strategy leveraging interest correlation. We evaluate
                 NetTube through both simulations and prototype
                 experiments, which show that it greatly reduces the
                 server workload, improves the playback quality and
                 scales well.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Mei:2012:ITC,
  author =       "Tao Mei and Lusong Li and Xian-Sheng Hua and Shipeng
                 Li",
  title =        "{ImageSense}: Towards contextual image advertising",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2071396.2071402",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:02 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The daunting volumes of community-contributed media
                 contents on the Internet have become one of the primary
                 sources for online advertising. However, conventional
                 advertising treats image and video advertising as
                 general text advertising by displaying relevant ads
                 based on the contents of the Web page, without
                 considering the inherent characteristics of visual
                 contents. This article presents a contextual
                 advertising system driven by images, which
                 automatically associates relevant ads with an image
                 rather than the entire text in a Web page and
                 seamlessly inserts the ads in the nonintrusive areas
                 within each individual image. The proposed system,
                 called ImageSense, supports scalable advertising of,
                 from root to node, Web sites, pages, and images. In
                 ImageSense, the ads are selected based on not only
                 textual relevance but also visual similarity, so that
                 the ads yield contextual relevance to both the text in
                 the Web page and the image content. The ad insertion
                 positions are detected based on image salience, as well
                 as face and text detection, to minimize intrusiveness
                 to the user. We evaluate ImageSense on a large-scale
                 real-world images and Web pages, and demonstrate the
                 effectiveness of ImageSense for online image
                 advertising.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Snidaro:2012:FMV,
  author =       "Lauro Snidaro and Ingrid Visentini and Gian Luca
                 Foresti",
  title =        "Fusing multiple video sensors for surveillance",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2071396.2071403",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:02 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Real-time detection, tracking, recognition, and
                 activity understanding of moving objects from multiple
                 sensors represent fundamental issues to be solved in
                 order to develop surveillance systems that are able to
                 autonomously monitor wide and complex environments. The
                 algorithms that are needed span therefore from image
                 processing to event detection and behaviour
                 understanding, and each of them requires dedicated
                 study and research. In this context, sensor fusion
                 plays a pivotal role in managing the information and
                 improving system performance. Here we present a novel
                 fusion framework for combining the data coming from
                 multiple and possibly heterogeneous sensors observing a
                 surveillance area.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Huang:2012:TAM,
  author =       "Jiun-Long Huang and Shih-Chuan Chiu and Man-Kwan
                 Shan",
  title =        "Towards an automatic music arrangement framework using
                 score reduction",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2071396.2071404",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:02 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Score reduction is a process that arranges music for a
                 target instrument by reducing original music. In this
                 study we present a music arrangement framework that
                 uses score reduction to automatically arrange music for
                 a target instrument. The original music is first
                 analyzed to determine the type of arrangement element
                 of each section, then the phrases are identified and
                 each is assigned a utility according to its type of
                 arrangement element. For a set of utility-assigned
                 phrases, we transform the music arrangement into an
                 optimization problem and propose a phrase selection
                 algorithm. The music is arranged by selecting
                 appropriate phrases satisfying the playability
                 constraints of a target instrument. Using the proposed
                 framework, we implement a music arrangement system for
                 the piano. An approach similar to Turing test is used
                 to evaluate the quality of the music arranged by our
                 system. The experiment results show that our system is
                 able to create viable music for the piano.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Steinmetz:2012:EN,
  author =       "Ralf Steinmetz",
  title =        "Editorial note",
  journal =      j-TOMCCAP,
  volume =       "8s",
  number =       "1",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2089085.2089086",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:04 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2012:BET,
  author =       "Dongyu Liu and Fei Li and Bo Shen and Songqing Chen",
  title =        "Building an efficient transcoding overlay for {P2P}
                 streaming to heterogeneous devices",
  journal =      j-TOMCCAP,
  volume =       "8s",
  number =       "1",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2089085.2089087",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:04 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "With the increasing deployment of Internet P2P/overlay
                 streaming systems, more and more clients use mobile
                 devices, such as smart phones and PDAs, to access these
                 Internet streaming services. Compared to wired
                 desktops, mobile devices normally have a smaller screen
                 size, a less color depth, and lower bandwidth and thus
                 cannot correctly and effectively render and display the
                 data streamed to desktops. To address this problem, in
                 this paper, we propose PAT (Peer-Assisted Transcoding)
                 to enable effective online transcoding in P2P/overlay
                 streaming. PAT has the following unique features.
                 First, it leverages active peer cooperation without
                 demanding infrastructure support such as transcoding
                 servers. Second, as online transcoding is
                 computationally intensive while the various devices
                 used by participating clients may have limited
                 computing power and related resources (e.g., battery,
                 bandwidth), an additional overlay, called metadata
                 overlay, is constructed to instantly share the
                 intermediate transcoding result of a transcoding
                 procedure with other transcoding nodes to minimize the
                 total computing overhead in the system. The
                 experimental results collected within a realistically
                 simulated testbed show that by consuming 6\% extra
                 bandwidth, PAT could save up to 58\% CPU cycles for
                 online transcoding.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Shen:2012:IFP,
  author =       "Zhijie Shen and Roger Zimmermann",
  title =        "{ISP}-friendly {P2P} live streaming: a roadmap to
                 realization",
  journal =      j-TOMCCAP,
  volume =       "8s",
  number =       "1",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2089085.2089088",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:04 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Peer-to-Peer (P2P) applications generate large amounts
                 of Internet network traffic. The wide-reaching
                 connectivity of P2P systems is creating resource
                 inefficiencies for network providers. Recent studies
                 have demonstrated that localizing cross-ISP (Internet
                 service provider) traffic can mitigate this challenge.
                 However, bandwidth sensitivity and display quality
                 requirements complicate the ISP-friendly design for
                 live streaming systems. To this date, although some
                 prior techniques focusing on live streaming systems
                 exist, the correlation between traffic localization and
                 streaming quality guarantee has not been well explored.
                 Additionally, the proposed solutions are often not easy
                 to apply in practice. In our presented work, we
                 demonstrate that the cross-ISP traffic of P2P live
                 streaming systems can be significantly reduced with
                 little impact on the streaming quality. First, we
                 analytically investigate and quantify the tradeoff
                 between traffic localization and streaming quality
                 guarantee, determining the lower bound of the inter-AS
                 (autonomous system) streaming rate below which
                 streaming quality cannot be preserved. Based on the
                 analysis, we further propose a practical ISP-friendly
                 solution, termed IFPS, which requires only minor
                 changes to the peer selection mechanism and can easily
                 be integrated into both new and existing systems.
                 Additionally, the significant opportunity for
                 localizing traffic is underscored by our collected
                 traces from PPLive, which also enabled us to derive
                 realistic parameters to guide our simulations. The
                 experimental results demonstrate that IFPS reduces
                 cross-ISP traffic from 81\% up to 98\% while keeping
                 streaming quality virtually unaffected.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lou:2012:QDD,
  author =       "Xiaosong Lou and Kai Hwang",
  title =        "Quality of data delivery in peer-to-peer video
                 streaming",
  journal =      j-TOMCCAP,
  volume =       "8s",
  number =       "1",
  pages =        "12:1--12:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2089085.2089089",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:04 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "QoS in a P2P video streaming system is evaluated in
                 three stages: content generation, data delivery and
                 video playback. We use jitter-free probability as the
                 main performance metric to study Quality of Data
                 delivery (QoD). A new model that incorporates both
                 bandwidth and data availability of P2P network is
                 proposed. Our model relies on a sharing factor that
                 models data availability among all peers. We simulate
                 on a minimalistic network to demonstrate how to apply
                 the analytical model to design a P2P video streaming
                 system with a very low jitter rate. Our simulation
                 experimental results reveal that the lower bound on
                 jitter-free probability is indeed effective to reflect
                 the QoD of the entire system. Our model captures the
                 impact of many design choices, including upload
                 bandwidth limit, peer selection strategies, and video
                 stream chunking schemes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wu:2012:DNW,
  author =       "Chuan Wu and Baochun Li and Shuqiao Zhao",
  title =        "Diagnosing network-wide {P2P} live streaming
                 inefficiencies",
  journal =      j-TOMCCAP,
  volume =       "8s",
  number =       "1",
  pages =        "13:1--13:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2089085.2089090",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:04 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Large-scale live peer-to-peer (P2P) streaming
                 applications have been successfully deployed in today's
                 Internet. While they can accommodate hundreds of
                 thousands of users simultaneously with hundreds of
                 channels of programming, there still commonly exist
                 channels and times where and when the streaming quality
                 is unsatisfactory. In this paper, based on more than
                 two terabytes and one year worth of live traces from
                 UUSee, a large-scale commercial P2P live streaming
                 system, we show an in-depth network-wide diagnosis of
                 streaming inefficiencies, commonly present in typical
                 mesh-based P2P live streaming systems. As the first
                 highlight of our work, we identify an evolutionary
                 pattern of low streaming quality in the system, and the
                 distribution of streaming inefficiencies across various
                 streaming channels and in different geographical
                 regions. We then carry out an extensive investigation
                 to explore the causes to such streaming inefficiencies
                 over different times and across different
                 channels/regions at specific times, by investigating
                 the impact of factors such as the number of peers, peer
                 upload bandwidth, inter-peer bandwidth availability,
                 server bandwidth consumption, and many more. The
                 original discoveries we have brought forward include
                 the two-sided effects of peer population on the
                 streaming quality in a streaming channel, the
                 significant impact of inter-peer bandwidth bottlenecks
                 at peak times, and the inefficient utilization of
                 server capacities across concurrent channels. Based on
                 these insights, we identify problems within the
                 existing P2P live streaming design and discuss a number
                 of suggestions to improve real-world streaming
                 protocols operating at a large scale.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wu:2012:ABP,
  author =       "Chuan Wu and Zongpeng Li and Xuanjia Qiu and Francis
                 C. M. Lau",
  title =        "Auction-based {P2P VoD} streaming: Incentives and
                 optimal scheduling",
  journal =      j-TOMCCAP,
  volume =       "8s",
  number =       "1",
  pages =        "14:1--14:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2089085.2089091",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:04 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Real-world large-scale Peer-to-Peer (P2P)
                 Video-on-Demand (VoD) streaming applications face more
                 design challenges as compared to P2P live streaming,
                 due to higher peer dynamics and less buffer overlap.
                 The situation is further complicated when we consider
                 the selfish nature of peers, who in general wish to
                 download more and upload less, unless otherwise
                 motivated. Taking a new perspective of distributed
                 dynamic auctions, we design efficient P2P VoD streaming
                 algorithms with simultaneous consideration of peer
                 incentives and streaming optimality. In our solution,
                 media block exchanges among peers are carried out
                 through local auctions, in which budget-constrained
                 peers bid for desired blocks from their neighbors,
                 which in turn deliver blocks to the winning bidders and
                 collect revenue. With strategic design of a
                 discriminative second price auction with seller
                 reservation, a supplying peer has full incentive to
                 maximally contribute its bandwidth to increase its
                 budget; requesting peers are also motivated to bid in
                 such a way that optimal media block scheduling is
                 achieved effectively in a fully decentralized fashion.
                 Applying techniques from convex optimization and
                 mechanism design, we prove (a) the incentive
                 compatibility at the selling and buying peers, and (b)
                 the optimality of the induced media block scheduling in
                 terms of social welfare maximization. Large-scale
                 empirical studies are conducted to investigate the
                 behavior of the proposed auction mechanisms in dynamic
                 P2P VoD systems based on real-world settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhang:2012:PHL,
  author =       "Tieying Zhang and Xueqi Cheng and Jianming Lv and
                 Zhenhua Li and Weisong Shi",
  title =        "Providing hierarchical lookup service for {P2P--VoD}
                 systems",
  journal =      j-TOMCCAP,
  volume =       "8s",
  number =       "1",
  pages =        "15:1--15:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2089085.2089092",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Mar 16 15:56:04 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Supporting random jump in P2P-VoD systems requires
                 efficient lookup for the `best' suppliers, where `best'
                 means the suppliers should meet two requirements:
                 content match and network quality match. Most studies
                 use a DHT-based method to provide content lookup;
                 however, these methods are neither able to meet the
                 network quality requirements nor suitable for VoD
                 streaming due to the large overhead. In this paper, we
                 propose Mediacoop, a novel hierarchical lookup scheme
                 combining both content and quality match to provide
                 random jumps for P2P-VoD systems. It exploits the play
                 position to efficiently locate the candidate suppliers
                 with required data (content match), and performs
                 refined lookup within the candidates to meet quality
                 match. Theoretical analysis and simulation results show
                 that Mediacoop is able to achieve lower jump latency
                 and control overhead than the typical DHT-based method.
                 Moreover, we implement Mediacoop in a BitTorrent-like
                 P2P-VoD system called CoolFish and make optimizations
                 for such ` total cache' applications. The
                 implementation and evaluation in CoolFish show that
                 Mediacoop is able to improve user experiences,
                 especially the jump latency, which verifies the
                 practicability of our design.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Anonymous:2012:TCO,
  author =       "Anonymous",
  title =        "Table of Contents: Online Supplement Volume {8S},
                 Number 1",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2",
  pages =        "16:1--16:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2168996.2169004",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:03 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Dornaika:2012:IRF,
  author =       "Fadi Dornaika and James H. Elder",
  title =        "Image registration for foveated panoramic sensing",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2",
  pages =        "17:1--17:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2168996.2168997",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:03 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article addresses the problem of registering
                 high-resolution, small field-of-view images with
                 low-resolution panoramic images provided by a panoramic
                 catadioptric video sensor. Such systems may find
                 application in surveillance and telepresence systems
                 that require a large field of view and high resolution
                 at selected locations. Although image registration has
                 been studied in more conventional applications, the
                 problem of registering panoramic and conventional video
                 has not previously been addressed, and this problem
                 presents unique challenges due to (i) the extreme
                 differences in resolution between the sensors (more
                 than a 16:1 linear resolution ratio in our
                 application), and (ii) the resolution inhomogeneity of
                 panoramic images. The main contributions of this
                 article are as follows. First, we introduce our
                 foveated panoramic sensor design. Second, we show how a
                 coarse registration can be computed from the raw images
                 using parametric template matching techniques. Third,
                 we propose two refinement methods allowing automatic
                 and near real-time registration between the two image
                 streams. The first registration method is based on
                 matching extracted interest points using a closed form
                 method. The second registration method is featureless
                 and based on minimizing the intensity discrepancy
                 allowing the direct recovery of both the geometric and
                 the photometric transforms. Fourth, a comparison
                 between the two registration methods is carried out,
                 which shows that the featureless method is superior in
                 accuracy. Registration examples using the developed
                 methods are presented.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhang:2012:CPC,
  author =       "Xin Zhang and Tom{\'a}s Ward and S{\'e}amus Mcloone",
  title =        "Comparison of predictive contract mechanisms from an
                 information theory perspective",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2",
  pages =        "18:1--18:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2168996.2168998",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:03 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Inconsistency arises across a Distributed Virtual
                 Environment due to network latency induced by state
                 changes communications. Predictive Contract Mechanisms
                 (PCMs) combat this problem through reducing the amount
                 of messages transmitted in return for perceptually
                 tolerable inconsistency. To date there are no methods
                 to quantify the efficiency of PCMs in communicating
                 this reduced state information. This article presents
                 an approach derived from concepts in information theory
                 for a deeper understanding of PCMs. Through a
                 comparison of representative PCMs, the worked analysis
                 illustrates interesting aspects of PCMs operation and
                 demonstrates how they can be interpreted as a form of
                 lossy information compression.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Olsen:2012:ITN,
  author =       "Dan R. Olsen and Derek Bunn and Trent Boulter and
                 Robert Walz",
  title =        "Interactive television news",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2",
  pages =        "19:1--19:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2168996.2168999",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:03 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "A new interactive television experience has been
                 created for watching television news. The goal is to
                 create a news experience that is similar to the way
                 people watch television in their living rooms while
                 giving viewers the power to make choices about what
                 they see. We partnered with existing news organizations
                 to create tools consistent with current news production
                 practices. The viewer experience allows selection of
                 the order of news content, skipping unwanted content
                 and exploring stories in more depth. These tools were
                 used to produce seven days of interactive commercial
                 news that were viewed in ten homes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Armitage:2012:ROF,
  author =       "Grenville Armitage and Amiel Heyde",
  title =        "{REED}: {Optimizing} first person shooter game server
                 discovery using network coordinates",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2",
  pages =        "20:1--20:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2168996.2169000",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:03 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Online First Person Shooter (FPS) games typically use
                 a client-server communication model, with thousands of
                 enthusiast-hosted game servers active at any time.
                 Traditional FPS server discovery may take minutes, as
                 clients create thousands of short-lived packet flows
                 while probing all available servers to find a selection
                 of game servers with tolerable round trip time (RTT).
                 REED reduces a client's probing time and network
                 traffic to 1\% of traditional server discovery. REED
                 game servers participate in a centralized, incremental
                 calculation of their network coordinates, and clients
                 use these coordinates to expedite the discovery of
                 servers with low RTTs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2012:ILC,
  author =       "Xiaobai Liu and Shuicheng Yan and Tat-Seng Chua and
                 Hai Jin",
  title =        "Image label completion by pursuing contextual
                 decomposability",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2",
  pages =        "21:1--21:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2168996.2169001",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:03 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article investigates how to automatically
                 complete the missing labels for the partially annotated
                 images, without image segmentation. The label
                 completion procedure is formulated as a nonnegative
                 data factorization problem, to decompose the global
                 image representations that are used for describing the
                 entire images, for instance, various image feature
                 descriptors, into their corresponding label
                 representations, that are used for describing the local
                 semantic regions within images. The solution provided
                 in this work is motivated by following observations.
                 First, label representations of the regions with the
                 same label often share certain commonness, yet may be
                 essentially different due to the large intraclass
                 variations. Thus, each label or concept should be
                 represented by using a subspace spanned by an ensemble
                 of basis, instead of a single one, to characterize the
                 intralabel diversities. Second, the subspaces for
                 different labels are different from each other. Third,
                 while two images are similar with each other, the
                 corresponding label representations should be similar.
                 We formulate this cross-image context as well as the
                 given partial label annotations in the framework of
                 nonnegative data factorization and then propose an
                 efficient multiplicative nonnegative update rules to
                 alternately optimize the subspaces and the
                 reconstruction coefficients. We also provide the
                 theoretic proof of algorithmic convergence and
                 correctness. Extensive experiments over several
                 challenging image datasets clearly demonstrate the
                 effectiveness of our proposed solution in boosting the
                 quality of image label completion and image annotation
                 accuracy. Based on the same formulation, we further
                 develop a label ranking algorithms, to refine the
                 noised image labels without any manual supervision. We
                 compare the proposed label ranking algorithm with the
                 state-of-the-arts over the popular evaluation databases
                 and achieve encouragingly improvements.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chen:2012:SGU,
  author =       "Yi Chen and Abhidnya A. Deshpande and Ramazan S.
                 Ayg{\"u}un",
  title =        "Sprite generation using sprite fusion",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2",
  pages =        "22:1--22:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2168996.2169002",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:03 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "There has been related research for sprite or mosaic
                 generation for over 15 years. In this article, we try
                 to understand the methodologies for sprite generation
                 and identify what has not actually been covered for
                 sprite generation. We first identify issues and focus
                 on the domain of videos for sprite generation. We
                 introduce a novel sprite fusion method that blends two
                 sprites. Sprite fusion method produces good results for
                 tracking videos and does not require object
                 segmentation. We present sample results of our
                 experiments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Weng:2012:CVR,
  author =       "Ming-Fang Weng and Yung-Yu Chuang",
  title =        "Collaborative video reindexing via matrix
                 factorization",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2",
  pages =        "23:1--23:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2168996.2169003",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:03 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Concept-based video indexing generates a matrix of
                 scores predicting the possibilities of concepts
                 occurring in video shots. Based on the idea of
                 collaborative filtering, this article presents
                 unsupervised methods to refine the initial scores
                 generated by concept classifiers by taking into account
                 the concept-to-concept correlation and shot-to-shot
                 similarity embedded within the score matrix. Given a
                 noisy matrix, we refine the inaccurate scores via
                 matrix factorization. This method is further improved
                 by learning multiple local models and incorporating
                 contextual-temporal structures. Experiments on the
                 TRECVID 2006--2008 datasets demonstrate relative
                 performance gains ranging from 13\% to 52\% without
                 using any user annotations or external knowledge
                 resources.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Kankanhalli:2012:ISI,
  author =       "Mohan S. Kankanhalli",
  title =        "Introduction to special issue on multimedia security",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2S",
  pages =        "31:1--31:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2344436.2344437",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:05 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Weir:2012:IHV,
  author =       "Jonathan Weir and Weiqi Yan and Mohan S. Kankanhalli",
  title =        "Image hatching for visual cryptography",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2S",
  pages =        "32:1--32:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2344436.2344438",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:05 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Image hatching (or nonphotorealistic line-art) is a
                 technique widely used in the printing or engraving of
                 currency. Diverse styles of brush strokes have
                 previously been adopted for different areas of an image
                 to create aesthetically pleasing textures and shading.
                 Because there is no continuous tone within these types
                 of images, a multilevel scheme is proposed, which uses
                 different textures based on a threshold level. These
                 textures are then applied to the different levels and
                 are then combined to build up the final hatched image.
                 The proposed technique allows a secret to be hidden
                 using Visual Cryptography (VC) within the hatched
                 images. Visual cryptography provides a very powerful
                 means by which one secret can be distributed into two
                 or more pieces known as shares. When the shares are
                 superimposed exactly together, the original secret can
                 be recovered without computation. Also provided is a
                 comparison between the original grayscale images and
                 the resulting hatched images that are generated by the
                 proposed algorithm. This reinforces that the overall
                 quality of the hatched scheme is sufficient. The
                 Structural SIMilarity index (SSIM) is used to perform
                 this comparison.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Li:2012:RIB,
  author =       "Jian Li and Hongmei Liu and Jiwu Huang and Yun Q.
                 Shi",
  title =        "Reference index-based {H.264} video watermarking
                 scheme",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2S",
  pages =        "33:1--33:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2344436.2344439",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:05 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Video watermarking has received much attention over
                 the past years as a promising solution to copy
                 protection. Watermark robustness is still a key issue
                 of research, especially when a watermark is embedded in
                 the compressed video domain. In this article, a robust
                 watermarking scheme for H.264 video is proposed. During
                 video encoding, the watermark is embedded in the index
                 of the reference frame, referred to as reference index,
                 a bitstream syntax element newly proposed in the H.264
                 standard. Furthermore, the video content (current coded
                 blocks) is modified based on an optimization model,
                 aiming at improving watermark robustness without
                 unacceptably degrading the video's visual quality or
                 increasing the video's bit rate. Compared with the
                 existing schemes, our method has the following three
                 advantages: (1) The bit rate of the watermarked video
                 is adjustable; (2) the robustness against common video
                 operations can be achieved; (3) the watermark embedding
                 and extraction are simple. Extensive experiments have
                 verified the good performance of the proposed
                 watermarking scheme.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Gao:2012:RHC,
  author =       "Xifeng Gao and Caiming Zhang and Yan Huang and Zhigang
                 Deng",
  title =        "A robust high-capacity affine-transformation-invariant
                 scheme for watermarking {$3$D} geometric models",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2S",
  pages =        "34:1--34:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2344436.2344440",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:05 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article we propose a novel, robust, and
                 high-capacity watermarking method for 3D meshes with
                 arbitrary connectivities in the spatial domain based on
                 affine invariants. Given a 3D mesh model, a watermark
                 is embedded as affine-invariant length ratios of one
                 diagonal segment to the residing diagonal intersected
                 by the other one in a coplanar convex quadrilateral. In
                 the extraction process, a watermark is recovered by
                 combining all the watermark pieces embedded in length
                 ratios through majority voting. Extensive experimental
                 results demonstrate the robustness, high computational
                 efficiency, high capacity, and
                 affine-transformation-invariant characteristics of the
                 proposed approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yang:2012:EMA,
  author =       "Rui Yang and Zhenhua Qu and Jiwu Huang",
  title =        "Exposing {MP3} audio forgeries using frame offsets",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2S",
  pages =        "35:1--35:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2344436.2344441",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:05 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Audio recordings should be authenticated before they
                 are used as evidence. Although audio watermarking and
                 signature are widely applied for authentication, these
                 two techniques require accessing the original audio
                 before it is published. Passive authentication is
                 necessary for digital audio, especially for the most
                 popular audio format: MP3. In this article, we propose
                 a passive approach to detect forgeries of MP3 audio.
                 During the process of MP3 encoding the audio samples
                 are divided into frames, and thus each frame has its
                 own frame offset after encoding. Forgeries lead to the
                 breaking of framing grids. So the frame offset is a
                 good indication for locating forgeries, and it can be
                 retrieved by the identification of the quantization
                 characteristic. In this way, the doctored positions can
                 be automatically located. Experimental results
                 demonstrate that the proposed approach is effective in
                 detecting some common forgeries, such as deletion,
                 insertion, substitution, and splicing. Even when the
                 bit rate is as low as 32 kbps, the detection rate is
                 above 99\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Feng:2012:CAO,
  author =       "Hui Feng and Hefei Ling and Fuhao Zou and Weiqi Yan
                 and Zhengding Lu",
  title =        "A collusion attack optimization strategy for digital
                 fingerprinting",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2S",
  pages =        "36:1--36:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2344436.2344442",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:05 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Collusion attack is a cost-efficient attack for
                 digital fingerprinting. In this article, we propose a
                 novel collusion attack strategy, Iterative Optimization
                 Collusion Attack (IOCA), which is based upon the
                 gradient attack and the principle of informed watermark
                 embedding. We evaluate the performance of the proposed
                 collusion attack strategy in defeating four typical
                 fingerprinting schemes under a well-constructed
                 evaluation framework. The simulation results show that
                 the proposed strategy performs more effectively than
                 the gradient attack, and adopting no more than three
                 fingerprinted copies can sufficiently collapse examined
                 fingerprinting schemes. Meanwhile, the content resulted
                 from the proposed attack still preserves high
                 perceptual quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Sachan:2012:ALV,
  author =       "Amit Sachan and Sabu Emmanuel and Mohan S.
                 Kankanhalli",
  title =        "Aggregate licenses validation for digital rights
                 violation detection",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "2S",
  pages =        "37:1--37:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2344436.2344443",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:05 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Digital Rights Management (DRM) is the term associated
                 with the set of technologies to prevent illegal
                 multimedia content distribution and consumption. DRM
                 systems generally involve multiple parties such as
                 owner, distributors, and consumers. The owner issues
                 redistribution licenses to its distributors. The
                 distributors in turn using their received
                 redistribution licenses can generate and issue new
                 redistribution licenses to other distributors and new
                 usage licenses to consumers. As a part of rights
                 violation detection, these newly generated licenses
                 must be validated by a validation authority against the
                 redistribution license used to generate them. The
                 validation of these newly generated licenses becomes
                 quite complex when there exist multiple redistribution
                 licenses for a media with the distributors. In such
                 cases, the validation process requires validation using
                 an exponential number (to the number of redistribution
                 licenses) of validation inequalities and each
                 validation inequality may contain up to an exponential
                 number of summation terms. This makes the validation
                 process computationally intensive and necessitates to
                 do the validation efficiently. To overcome this, we
                 propose validation tree, a prefix-tree-based validation
                 method to do the validation efficiently. Theoretical
                 analysis and experimental results show that our
                 proposed technique reduces the validation time
                 significantly.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Riiser:2012:VSU,
  author =       "Haakon Riiser and Tore Endestad and Paul Vigmostad and
                 Carsten Griwodz and P{\^a}l Halvorsen",
  title =        "Video streaming using a location-based
                 bandwidth-lookup service for bitrate planning",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3",
  pages =        "24:1--24:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2240136.2240137",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:06 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "A lot of people around the world commute using public
                 transportation and would like to spend this time
                 viewing streamed video content such as news or sports
                 updates. However, mobile wireless networks typically
                 suffer from severe bandwidth fluctuations, and the
                 networks are often completely unresponsive for several
                 seconds, sometimes minutes. Today, there are several
                 ways of adapting the video bitrate and thus the video
                 quality to such fluctuations, for example, using
                 scalable video codecs or segmented adaptive HTTP
                 streaming that switches between nonscalable video
                 streams encoded in different bitrates. Still, for a
                 better long-term video playout experience that avoids
                 disruptions and frequent quality changes while using
                 existing video adaptation technology, it is desirable
                 to perform bandwidth prediction and planned quality
                 adaptation. This article describes a video streaming
                 system for receivers equipped with a GPS. A receiver's
                 download rate is constantly monitored, and periodically
                 reported back to a central database along with
                 associated GPS positional data. Thus, based on the
                 current location, a streaming device can use a
                 GPS-based bandwidth-lookup service in order to better
                 predict the near-future bandwidth availability and
                 create a schedule for the video playout that takes
                 likely future availability into account. To create a
                 prototype and perform initial tests, we conducted
                 several field trials while commuting using public
                 transportation. We show how our database has been used
                 to predict bandwidth fluctuations and network outages,
                 and how this information helps maintain uninterrupted
                 playback with less compromise on video quality than
                 possible without prediction.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Valdes:2012:AEV,
  author =       "Victor Valdes and Jose M. Martinez",
  title =        "Automatic evaluation of video summaries",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3",
  pages =        "25:1--25:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2240136.2240138",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:06 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article describes a method for the automatic
                 evaluation of video summaries based on the training of
                 individual predictors for different quality measures
                 from the TRECVid 2008 BBC Rushes Summarization Task.
                 The obtained results demonstrate that, with a large set
                 of evaluation data, it is possible to train fully
                 automatic evaluation systems based on visual features
                 automatically extracted from the summaries. The
                 proposed approach will enable faster and easier
                 estimation of the results of newly developed
                 abstraction algorithms and the study of which summary
                 characteristics influence their perceived quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Tian:2012:STL,
  author =       "Xinmei Tian and Dacheng Tao and Yong Rui",
  title =        "Sparse transfer learning for interactive video search
                 reranking",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3",
  pages =        "26:1--26:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2240136.2240139",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:06 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Visual reranking is effective to improve the
                 performance of the text-based video search. However,
                 existing reranking algorithms can only achieve limited
                 improvement because of the well-known semantic gap
                 between low-level visual features and high-level
                 semantic concepts. In this article, we adopt
                 interactive video search reranking to bridge the
                 semantic gap by introducing user's labeling effort. We
                 propose a novel dimension reduction tool, termed sparse
                 transfer learning (STL), to effectively and efficiently
                 encode user's labeling information. STL is particularly
                 designed for interactive video search reranking.
                 Technically, it (a) considers the pair-wise
                 discriminative information to maximally separate
                 labeled query relevant samples from labeled query
                 irrelevant ones, (b) achieves a sparse representation
                 for the subspace to encodes user's intention by
                 applying the elastic net penalty, and (c) propagates
                 user's labeling information from labeled samples to
                 unlabeled samples by using the data distribution
                 knowledge. We conducted extensive experiments on the
                 TRECVID 2005, 2006 and 2007 benchmark datasets and
                 compared STL with popular dimension reduction
                 algorithms. We report superior performance by using the
                 proposed STL-based interactive video search
                 reranking.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhang:2012:IBD,
  author =       "Xin Zhang and Tom{\'a}s E. Ward and S{\'e}amus
                 Mcloone",
  title =        "An information-based dynamic extrapolation model for
                 networked virtual environments",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3",
  pages =        "27:1--27:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2240136.2240140",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:06 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Various Information Management techniques have been
                 developed to help maintain a consistent shared virtual
                 world in a Networked Virtual Environment. However, such
                 techniques have to be carefully adapted to the
                 application state dynamics and the underlying network.
                 This work presents a novel framework that minimizes
                 inconsistency by optimizing bandwidth usage to deliver
                 useful information. This framework measures the state
                 evolution using an information model and dynamically
                 switches extrapolation models and the packet rate to
                 make the most information-efficient usage of the
                 available bandwidth. The results shown demonstrate that
                 this approach can help optimize consistency under
                 constrained and time-varying network conditions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yang:2012:UCM,
  author =       "Linjun Yang and Bo Geng and Alan Hanjalic and
                 Xian-Sheng Hua",
  title =        "A unified context model for web image retrieval",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3",
  pages =        "28:1--28:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2240136.2240141",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:06 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Content-based web image retrieval based on the
                 query-by-example (QBE) principle remains a challenging
                 problem due to the semantic gap as well as the gap
                 between a user's intent and the representativeness of a
                 typical image query. In this article, we propose to
                 address this problem by integrating query-related
                 contextual information into an advanced query model to
                 improve the performance of QBE-based web image
                 retrieval. We consider both the local and global
                 context of the query image. The local context can be
                 inferred from the web pages and the click-through log
                 associated with the query image, while the global
                 context is derived from the entire corpus comprising
                 all web images and the associated web pages. To
                 effectively incorporate the local query context we
                 propose a language modeling based approach to deal with
                 the combined structured query representation from the
                 contextual and visual information. The global query
                 context is integrated by the multi-modal relevance
                 model to ``reconstruct'' the query from the document
                 models indexed in the corpus. In this way, the global
                 query context is employed to address the noise or
                 missing information in the query and its local context,
                 so that a comprehensive and robust query model can be
                 obtained. We evaluated the proposed approach on a
                 representative product image dataset collected from the
                 web and demonstrated that the inclusion of the local
                 and global query contexts significantly improves the
                 performance of QBE-based web image retrieval.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Patras:2012:CTS,
  author =       "Paul Patras and Albert Banchs and Pablo Serrano",
  title =        "A control theoretic scheme for efficient video
                 transmission over {IEEE 802.11e EDCA WLANs}",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3",
  pages =        "29:1--29:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2240136.2240142",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:06 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The EDCA mechanism of the IEEE 802.11 standard has
                 been designed to support, among others, video traffic.
                 This mechanism relies on a number of parameters whose
                 configuration is left open by the standard. Although
                 there are some recommended values for these parameters,
                 they are fixed independent of the WLAN conditions,
                 which results in suboptimal performance. Following this
                 observation, a number of approaches in the literature
                 have been devised to set the EDCA parameters based on
                 an estimation of the WLAN conditions. However, these
                 previous approaches are based on heuristics and hence
                 do not guarantee optimized performance. In this article
                 we propose a novel algorithm to adjust the EDCA
                 parameters to carry video traffic which, in contrast to
                 previous approaches, is sustained on mathematical
                 foundations that guarantee optimal performance. In
                 particular, our approach builds upon (i) an analytical
                 model of the WLAN performance under video traffic, used
                 to derive the optimal point of operation of EDCA, and
                 (ii) a control theoretic designed mechanism which
                 drives the WLAN to this point of operation. Via
                 extensive simulations, we show that the proposed
                 approach performs optimally and substantially
                 outperforms the standard recommended configuration as
                 well as previous adaptive proposals.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhu:2012:JLS,
  author =       "Xinglei Zhu and Chang W. Chen",
  title =        "A joint layered scheme for reliable and secure mobile
                 {JPEG-2000} streaming",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3",
  pages =        "30:1--30:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2240136.2240143",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:06 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents a novel joint layered approach
                 to simultaneously achieve both reliable and secure
                 mobile JPEG-2000 image streaming. With a priori
                 knowledge of JPEG-2000 source coding and channel
                 coding, the proposed joint system integrates
                 authentication into the media error protection
                 components to ensure that every source-decodable media
                 unit is authenticated. By such a dedicated design, the
                 proposed scheme protects both compressed JPEG-2000
                 codestream and the authentication data from wireless
                 channel impairments. It is fundamentally different from
                 many existing systems that consider the problem of
                 media authentication separately from the other
                 operations in the media transmission system. By
                 utilizing the contextual relationship, such as coding
                 dependency and content importance between media slices
                 for authentication hash appending, the proposed scheme
                 generates an extremely low authentication overhead.
                 Under this joint layered coding framework, an optimal
                 rate allocation algorithm for source coding, channel
                 coding, and media authentication is developed to
                 guarantee end-to-end media quality. Experiment results
                 on JPEG-2000 images validate the proposed scheme and
                 demonstrate that the performance of the proposed scheme
                 is approaching its upper bound, in which case no
                 authentication is applied to the media stream.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Gatica-Perez:2012:ISS,
  author =       "Daniel Gatica-Perez and Gang Hua and Wei Tsang Ooi and
                 P{\aa}l Halvorsen",
  title =        "Introduction to the special section of best papers of
                 {ACM Multimedia 2011}",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3s",
  pages =        "38:1--38:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2348816.2348817",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:07 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wu:2012:CPA,
  author =       "Wanmin Wu and Ahsan Arefin and Gregorij Kurillo and
                 Pooja Agarwal and Klara Nahrstedt and Ruzena Bajcsy",
  title =        "{CZLoD}: a psychophysical approach for {$3$D}
                 tele-immersive video",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3s",
  pages =        "39:1--39:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2348816.2348818",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:07 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents a psychophysical study that
                 measures the perceptual thresholds of a new factor
                 called Color-plus-Depth Level-of-Details (CZLoD)
                 peculiar to polygon-based 3D tele-immersive video. The
                 results demonstrate the existence of Just Noticeable
                 Degradation and Just Unacceptable Degradation
                 thresholds on the factor. In light of the results, we
                 design and implement a real-time perception-based
                 quality adaptor for 3D tele-immersive video. Our
                 experimental results show that the adaptation scheme
                 can reduce resource usage (e.g., CPU cycles) while
                 considerably enhancing the overall perceived visual
                 quality. Our analysis confirms the potential temporal
                 and spatial performance benefits achievable with CZLoD
                 adaptation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ji:2012:AQS,
  author =       "Rongrong Ji and Felix X. Yu and Tongtao Zhang and
                 Shih-Fu Chang",
  title =        "Active query sensing: {Suggesting} the best query view
                 for mobile visual search",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3s",
  pages =        "40:1--40:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2348816.2348819",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:07 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "While much exciting progress is being made in mobile
                 visual search, one important question has been left
                 unexplored in all current systems. When searching
                 objects or scenes in the 3D world, which viewing angle
                 is more likely to be successful? More particularly, if
                 the first query fails to find the right target, how
                 should the user control the mobile camera to form the
                 second query? In this article, we propose a novel
                 Active Query Sensing system for mobile location search,
                 which actively suggests the best subsequent query view
                 to recognize the physical location in the mobile
                 environment. The proposed system includes two unique
                 components: (1) an offline process for analyzing the
                 saliencies of different views associated with each
                 geographical location, which predicts the location
                 search precisions of individual views by modeling their
                 self-retrieval score distributions. (2) an online
                 process for estimating the view of an unseen query, and
                 suggesting the best subsequent view change.
                 Specifically, the optimal viewing angle change for the
                 next query can be formulated as an online information
                 theoretic approach. Using a scalable visual search
                 system implemented over a NYC street view dataset (0.3
                 million images), we show a performance gain by reducing
                 the failure rate of mobile location search to only 12\%
                 after the second query. We have also implemented an
                 end-to-end functional system, including user interfaces
                 on iPhones, client-server communication, and a remote
                 search server. This work may open up an exciting new
                 direction for developing interactive mobile media
                 applications through the innovative exploitation of
                 active sensing and query formulation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Shirmohammadi:2012:ISS,
  author =       "Shervin Shirmohammadi and Mohamed Hefeeda and Wei
                 Tsang Ooi and Romulus Grigoras",
  title =        "Introduction to special section on {$3$D} mobile
                 multimedia",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3s",
  pages =        "41:1--41:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2348816.2348820",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:07 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2012:QOV,
  author =       "Yanwei Liu and Song Ci and Hui Tang and Yun Ye and
                 Jinxia Liu",
  title =        "{QoE}-oriented {$3$D} video transcoding for mobile
                 streaming",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3s",
  pages =        "42:1--42:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2348816.2348821",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:07 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "With advance in mobile 3D display, mobile 3D video is
                 already enabled by the wireless multimedia networking,
                 and it will be gradually popular since it can make
                 people enjoy the natural 3D experience anywhere and
                 anytime. In current stage, mobile 3D video is generally
                 delivered over the heterogeneous network combined by
                 wired and wireless channels. How to guarantee the
                 optimal 3D visual quality of experience (QoE) for the
                 mobile 3D video streaming is one of the important
                 topics concerned by the service provider. In this
                 article, we propose a QoE-oriented transcoding approach
                 to enhance the quality of mobile 3D video service. By
                 learning the pre-controlled QoE patterns of 3D
                 contents, the proposed 3D visual QoE inferring model
                 can be utilized to regulate the transcoding
                 configurations in real-time according to the feedbacks
                 of network and user-end device information. In the
                 learning stage, we propose a piecewise linear mean
                 opinion score (MOS) interpolation method to further
                 reduce the cumbersome manual work of preparing QoE
                 patterns. Experimental results show that the proposed
                 transcoding approach can provide the adapted 3D stream
                 to the heterogeneous network, and further provide
                 superior QoE performance to the fixed quantization
                 parameter (QP) transcoding and mean squared error (MSE)
                 optimized transcoding for mobile 3D video streaming.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2012:NVT,
  author =       "Shujie Liu and Chang Wen Chen",
  title =        "A novel {$3$D} video transcoding scheme for adaptive
                 {$3$D} video transmission to heterogeneous terminals",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3s",
  pages =        "43:1--43:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2348816.2348822",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:07 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Three-dimensional video (3DV) is attracting many
                 interests with its enhanced viewing experience and more
                 user driven features. 3DV has several unique
                 characteristics different from 2D video: (1) It has a
                 much larger amount of data captured and compressed, and
                 corresponding video compression techniques can be much
                 more complicated in order to explore data redundancy.
                 This will lead to more constraints on users' network
                 access and computational capability, (2) Most users
                 only need part of the 3DV data at any given time, while
                 the users' requirements exhibit large diversity, (3)
                 Only a limited number of views are captured and
                 transmitted for 3DV. View rendering is thus necessary
                 to generate virtual views based on the received 3DV
                 data. However, many terminal devices do not have the
                 functionality to generate virtual views. To enable 3DV
                 experience for the majority of users with limited
                 capabilities, adaptive 3DV transmission is necessary to
                 extract/generate the required data content and
                 represent it with supported formats and bitrates for
                 heterogeneous terminal devices. 3DV transcoding is an
                 emerging and effective technique to achieve desired
                 adaptive 3DV transmission. In this article, we propose
                 the first efficient 3DV transcoding scheme that can
                 obtain any desired view, either an encoded one or a
                 virtual one, and compress it with more universal
                 H.264/AVC. The key idea of the proposed scheme is to
                 appropriately utilize motion information contained in
                 the bitstream to generate candidate motion information.
                 Original information of both the desired view and
                 reference views are used to obtain this candidate
                 information and a proper motion refinement process is
                 carried out for certain blocks. Simulation results show
                 that, compared to the straightforward cascade
                 algorithm, the proposed scheme is able to output
                 compressed bitstream of the required view with
                 significantly reduced complexity while incurring
                 negligible performance loss. Such a 3DV transcoding can
                 be applied to most gateways that usually have
                 constraints on computational complexity and time
                 delay.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Roodaki:2012:NMD,
  author =       "Hoda Roodaki and Mahmoud Reza Hashemi and Shervin
                 Shirmohammadi",
  title =        "A new methodology to derive objective quality
                 assessment metrics for scalable multiview {$3$D} video
                 coding",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3s",
  pages =        "44:1--44:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2348816.2348823",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:07 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "With the growing demand for 3D video, efforts are
                 underway to incorporate it in the next generation of
                 broadcast and streaming applications and standards. 3D
                 video is currently available in games, entertainment,
                 education, security, and surveillance applications. A
                 typical scenario for multiview 3D consists of several
                 3D video sequences captured simultaneously from the
                 same scene with the help of multiple cameras from
                 different positions and through different angles.
                 Multiview video coding provides a compact
                 representation of these multiple views by exploiting
                 the large amount of inter-view statistical
                 dependencies. One of the major challenges in this field
                 is how to transmit the large amount of data of a
                 multiview sequence over error prone channels to
                 heterogeneous mobile devices with different bandwidth,
                 resolution, and processing/battery power, while
                 maintaining a high visual quality. Scalable Multiview
                 3D Video Coding (SMVC) is one of the methods to address
                 this challenge; however, the evaluation of the overall
                 visual quality of the resulting scaled-down video
                 requires a new objective perceptual quality measure
                 specifically designed for scalable multiview 3D video.
                 Although several subjective and objective quality
                 assessment methods have been proposed for multiview 3D
                 sequences, no comparable attempt has been made for
                 quality assessment of scalable multiview 3D video. In
                 this article, we propose a new methodology to build
                 suitable objective quality assessment metrics for
                 different scalable modalities in multiview 3D video.
                 Our proposed methodology considers the importance of
                 each layer and its content as a quality of experience
                 factor in the overall quality. Furthermore, in addition
                 to the quality of each layer, the concept of disparity
                 between layers (inter-layer disparity) and disparity
                 between the units of each layer (intra-layer disparity)
                 is considered as an effective feature to evaluate
                 overall perceived quality more accurately. Simulation
                 results indicate that by using this methodology, more
                 efficient objective quality assessment metrics can be
                 introduced for each multiview 3D video scalable
                 modalities.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hamza:2012:EEM,
  author =       "Ahmed Hamza and Mohamed Hefeeda",
  title =        "Energy-efficient multicasting of multiview {$3$D}
                 videos to mobile devices",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3s",
  pages =        "45:1--45:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2348816.2348824",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:07 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Multicasting multiple video streams over wireless
                 broadband access networks enables the delivery of
                 multimedia content to large-scale user communities in a
                 cost-efficient manner. Three dimensional (3D) videos
                 are the next natural step in the evolution of digital
                 media technologies. In order to provide 3D perception,
                 3D video streams contain one or more views that greatly
                 increase their bandwidth requirements. Due to the
                 limited channel capacity and variable bit rate of the
                 videos, multicasting multiple 3D videos over wireless
                 broadband networks is a challenging problem. In this
                 article, we consider a 4G wireless access network in
                 which a number of 3D videos represented in two-view
                 plus depth format and encoded using scalable video
                 coders are multicast. We formulate the optimal 3D video
                 multicasting problem to maximize the quality of
                 rendered virtual views on the receivers' displays. We
                 show that this problem is NP-complete and present a
                 polynomial time approximation algorithm to solve it. We
                 then extend the proposed algorithm to efficiently
                 schedule the transmission of the chosen substreams from
                 each video in order to maximize the power saving on the
                 mobile receivers. Our simulation-based experimental
                 results show that our algorithm provides solutions that
                 are within 0.3 dB of the optimal solutions while
                 satisfying real-time requirements of multicast systems.
                 In addition, our algorithm results in an average power
                 consumption reduction of 86\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Shi:2012:RTR,
  author =       "Shu Shi and Klara Nahrstedt and Roy Campbell",
  title =        "A real-time remote rendering system for interactive
                 mobile graphics",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3s",
  pages =        "46:1--46:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2348816.2348825",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:07 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Mobile devices are gradually changing people's
                 computing behaviors. However, due to the limitations of
                 physical size and power consumption, they are not
                 capable of delivering a 3D graphics rendering
                 experience comparable to desktops. Many applications
                 with intensive graphics rendering workloads are unable
                 to run on mobile platforms directly. This issue can be
                 addressed with the idea of remote rendering: the heavy
                 3D graphics rendering computation runs on a powerful
                 server and the rendering results are transmitted to the
                 mobile client for display. However, the simple remote
                 rendering solution inevitably suffers from the large
                 interaction latency caused by wireless networks, and is
                 not acceptable for many applications that have very
                 strict latency requirements. In this article, we
                 present an advanced low-latency remote rendering system
                 that assists mobile devices to render interactive 3D
                 graphics in real-time. Our design takes advantage of an
                 image based rendering technique: 3D image warping, to
                 synthesize the mobile display from the depth images
                 generated on the server. The research indicates that
                 the system can successfully reduce the interaction
                 latency while maintaining the high rendering quality by
                 generating multiple depth images at the carefully
                 selected viewpoints. We study the problem of viewpoint
                 selection, propose a real-time reference viewpoint
                 prediction algorithm, and evaluate the algorithm
                 performance with real-device experiments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Guan:2012:EMM,
  author =       "Wei Guan and Suya You and Ulrich Newmann",
  title =        "Efficient matchings and mobile augmented reality",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "3s",
  pages =        "47:1--47:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2348816.2348826",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Nov 6 18:13:07 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "With the fast-growing popularity of smart phones in
                 recent years, augmented reality (AR) on mobile devices
                 is gaining more attention and becomes more demanding
                 than ever before. However, the limited processors in
                 mobile devices are not quite promising for AR
                 applications that require real-time processing speed.
                 The challenge exists due to the fact that, while fast
                 features are usually not robust enough in matchings,
                 robust features like SIFT or SURF are not
                 computationally efficient. There is always a tradeoff
                 between robustness and efficiency and it seems that we
                 have to sacrifice one for the other. While this is true
                 for most existing features, researchers have been
                 working on designing new features with both robustness
                 and efficiency. In this article, we are not trying to
                 present a completely new feature. Instead, we propose
                 an efficient matching method for robust features. An
                 adaptive scoring scheme and a more distinctive
                 descriptor are also proposed for performance
                 improvements. Besides, we have developed an outdoor
                 augmented reality system that is based on our proposed
                 methods. The system demonstrates that not only it can
                 achieve robust matchings efficiently, it is also
                 capable to handle large occlusions such as passengers
                 and moving vehicles, which is another challenge for
                 many AR applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{TOMCCAP-STAFF:2012:TCO,
  author =       "{TOMCCAP-STAFF}",
  title =        "Table of contents: Online supplement volume 8, number
                 2s, online supplement volume 8, number 3s",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "4",
  pages =        "48:1--48:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2379790.2382432",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:21 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Steinmetz:2012:E,
  author =       "Ralf Steinmetz",
  title =        "Editorial",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "4",
  pages =        "49:1--49:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2379790.2379791",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:21 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2012:LRC,
  author =       "Xiaobai Liu and Shuicheng Yan and Bin Cheng and Jinhui
                 Tang and Tat-Sheng Chua and Hai Jin",
  title =        "Label-to-region with continuity-biased bi-layer
                 sparsity priors",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "4",
  pages =        "50:1--50:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2379790.2379792",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:21 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this work, we investigate how to reassign the fully
                 annotated labels at image level to those contextually
                 derived semantic regions, namely Label-to-Region (L2R),
                 in a collective manner. Given a set of input images
                 with label annotations, the basic idea of our approach
                 to L2R is to first discover the patch correspondence
                 across images, and then propagate the common labels
                 shared in image pairs to these correlated patches.
                 Specially, our approach consists of following aspects.
                 First, each of the input images is encoded as a
                 Bag-of-Hierarchical-Patch (BOP) for capturing the rich
                 cues at variant scales, and the individual patches are
                 expressed by patch-level feature descriptors. Second,
                 we present a sparse representation formulation for
                 discovering how well an image or a semantic region can
                 be robustly reconstructed by all the other image
                 patches from the input image set. The underlying
                 philosophy of our formulation is that an image region
                 can be sparsely reconstructed with the image patches
                 belonging to the other images with common labels, while
                 the robustness in label propagation across images
                 requires that these selected patches come from very few
                 images. This preference of being sparse at both patch
                 and image level is named bi-layer sparsity prior.
                 Meanwhile, we enforce the preference of choosing
                 larger-size patches in reconstruction, referred to as
                 continuity-biased prior in this work, which may further
                 enhance the reliability of L2R assignment. Finally, we
                 harness the reconstruction coefficients to propagate
                 the image labels to the matched patches, and fuse the
                 propagation results over all patches to finalize the
                 L2R task. As a by-product, the proposed
                 continuity-biased bi-layer sparse representation
                 formulation can be naturally applied to perform image
                 annotation on new testing images. Extensive experiments
                 on three public image datasets clearly demonstrate the
                 effectiveness of our proposed framework in both L2R
                 assignment and image annotation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Rooij:2012:ETS,
  author =       "Ork De Rooij and Marcel Worring",
  title =        "Efficient targeted search using a focus and context
                 video browser",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "4",
  pages =        "51:1--51:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2379790.2379793",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:21 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Currently there are several interactive content-based
                 video retrieval techniques and systems available.
                 However, retrieval performance depends heavily on the
                 means of interaction. We argue that effective CBVR
                 requires efficient, specialized user interfaces. In
                 this article we propose guidelines for such an
                 interface, and we propose an effective CBVR engine: the
                 ForkBrowser, which builds upon the principle of focus
                 and context. This browser is evaluated using a
                 combination of user simulation and real user
                 evaluation. Results indicate that the ideas have merit,
                 and that the browser performs very well when compared
                 to the state-of-the-art in video retrieval.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ghinea:2012:UPM,
  author =       "Gheorghita Ghinea and Oluwakemi Ademoye",
  title =        "User perception of media content association in
                 olfaction-enhanced multimedia",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "4",
  pages =        "52:1--52:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2379790.2379794",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:21 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Olfaction is an exciting challenge facing multimedia
                 applications. In this article we have investigated user
                 perception of the association between olfactory media
                 content and video media content in olfactory-enhanced
                 multimedia. Results show that the association between
                 scent and content has a significant impact on the
                 user-perceived experience of olfactory-enhanced
                 multimedia.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Spicer:2012:NAD,
  author =       "Ryan Spicer and Yu-Ru Lin and Aisling Kelliher and
                 Hari Sundaram",
  title =        "{NextSlidePlease}: Authoring and delivering agile
                 multimedia presentations",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "4",
  pages =        "53:1--53:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2379790.2379795",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:21 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Presentation support tools, such as Microsoft
                 PowerPoint, pose challenges both in terms of creating
                 linear presentations from complex data and fluidly
                 navigating such linear structures when presenting to
                 diverse audiences. NextSlidePlease is a slideware
                 application that addresses these challenges using a
                 directed graph structure approach for authoring and
                 delivering multimedia presentations. The application
                 combines novel approaches for searching and analyzing
                 presentation datasets, composing meaningfully
                 structured presentations, and efficiently delivering
                 material under a variety of time constraints. We
                 introduce and evaluate a presentation analysis
                 algorithm intended to simplify the process of authoring
                 dynamic presentations, and a time management and path
                 selection algorithm that assists users in prioritizing
                 content during the presentation process. Results from
                 two comparative user studies indicate that the directed
                 graph approach promotes the creation of hyperlinks, the
                 consideration of connections between content items, and
                 a richer understanding of the time management
                 consequences of including and selecting presentation
                 material.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Qi:2012:OBI,
  author =       "Heng Qi and Keqiu Li and Yanming Shen and Wenyu Qu",
  title =        "Object-based image retrieval with kernel on adjacency
                 matrix and local combined features",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "4",
  pages =        "54:1--54:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2379790.2379796",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:21 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In object-based image retrieval, there are two
                 important issues: an effective image representation
                 method for representing image content and an effective
                 image classification method for processing user
                 feedback to find more images containing the
                 user-desired object categories. In the image
                 representation method, the local-based representation
                 is the best selection for object-based image retrieval.
                 As a kernel-based classification method, Support Vector
                 Machine (SVM) has shown impressive performance on image
                 classification. But SVM cannot work on the local-based
                 representation unless there is an appropriate kernel.
                 To address this problem, some representative kernels
                 are proposed in literatures. However, these kernels
                 cannot work effectively in object-based image retrieval
                 due to ignoring the spatial context and the combination
                 of local features. In this article, we present Adjacent
                 Matrix (AM) and the Local Combined Features (LCF) to
                 incorporate the spatial context and the combination of
                 local features into the kernel. We propose the AM-LCF
                 feature vector to represent image content and the
                 AM-LCF kernel to measure the similarities between
                 AM-LCF feature vectors. According to the detailed
                 analysis, we show that the proposed kernel can overcome
                 the deficiencies of existing kernels. Moreover, we
                 evaluate the proposed kernel through experiments of
                 object-based image retrieval on two public image sets.
                 The experimental results show that the performance of
                 object-based image retrieval can be improved by the
                 proposed kernel.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Li:2012:VPA,
  author =       "Guangda Li and Meng Wang and Zheng Lu and Richang Hong
                 and Tat-Seng Chua",
  title =        "In-video product annotation with {Web} information
                 mining",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "4",
  pages =        "55:1--55:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2379790.2379797",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:21 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Product annotation in videos is of great importance
                 for video browsing, search, and advertisement. However,
                 most of the existing automatic video annotation
                 research focuses on the annotation of high-level
                 concepts, such as events, scenes, and object
                 categories. This article presents a novel solution to
                 the annotation of specific products in videos by mining
                 information from the Web. It collects a set of
                 high-quality training data for each product by
                 simultaneously leveraging Amazon and Google image
                 search engine. A visual signature for each product is
                 then built based on the bag-of-visual-words
                 representation of the training images. A correlative
                 sparsification approach is employed to remove noisy
                 bins in the visual signatures. These signatures are
                 used to annotate video frames. We conduct experiments
                 on more than 1,000 videos and the results demonstrate
                 the feasibility and effectiveness of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Gopinathan:2012:ASO,
  author =       "Ajay Gopinathan and Zongpeng Li",
  title =        "Algorithms for stochastic optimization of multicast
                 content delivery with network coding",
  journal =      j-TOMCCAP,
  volume =       "8",
  number =       "4",
  pages =        "56:1--56:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2379790.2379798",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:21 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The usage of network resources by content providers is
                 commonly governed by Service-Level Agreements (SLA)
                 between the content provider and the network service
                 provider. Resource usage exceeding the limits specified
                 in the SLA incurs the content provider additional
                 charges, usually at a higher cost. Hence, the content
                 provider's goal is to provision adequate resources in
                 the SLA based on forecasts of future demand. We study
                 capacity purchasing strategies when the content
                 provider employs network coded multicast as the media
                 delivery mechanism, with uncertainty in its future
                 customer set explicitly taken into consideration. The
                 latter requires the content provider to make capacity
                 provisioning decisions based on market predictions and
                 historical customer usage patterns. The probabilistic
                 element suggests a stochastic optimization approach. We
                 model this problem as a two-stage stochastic
                 optimization problem with recourse. Such optimizations
                 are \#P-hard to solve directly, and we design two
                 approximation algorithms for them. The first is a
                 heuristic algorithm that exploits properties unique to
                 network coding, so that only polynomial-time operations
                 are needed. It performs well in general scenarios, but
                 the gap from the optimal solution is not bounded by any
                 constant in the worst case. This motivates our second
                 approach, a sampling algorithm partly inspired from the
                 work of Gupta et al. [2004a]. We employ techniques from
                 duality theory in linear optimization to prove that the
                 sampling algorithm provides a 3-approximation to the
                 stochastic multicast problem. We conduct extensive
                 simulations to illustrate the efficacy of both
                 algorithms, and show that the performance of both is
                 usually within 10\% of the optimal solution in
                 practice.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hendrikx:2013:PCG,
  author =       "Mark Hendrikx and Sebastiaan Meijer and Joeri {Van Der
                 Velden} and Alexandru Iosup",
  title =        "Procedural content generation for games: a survey",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2422956.2422957",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:22 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Hundreds of millions of people play computer games
                 every day. For them, game content-from 3D objects to
                 abstract puzzles-plays a major entertainment role.
                 Manual labor has so far ensured that the quality and
                 quantity of game content matched the demands of the
                 playing community, but is facing new scalability
                 challenges due to the exponential growth over the last
                 decade of both the gamer population and the production
                 costs. Procedural Content Generation for Games (PCG-G)
                 may address these challenges by automating, or aiding
                 in, game content generation. PCG-G is difficult, since
                 the generator has to create the content, satisfy
                 constraints imposed by the artist, and return
                 interesting instances for gamers. Despite a large body
                 of research focusing on PCG-G, particularly over the
                 past decade, ours is the first comprehensive survey of
                 the field of PCG-G. We first introduce a comprehensive,
                 six-layered taxonomy of game content: bits, space,
                 systems, scenarios, design, and derived. Second, we
                 survey the methods used across the whole field of PCG-G
                 from a large research body. Third, we map PCG-G methods
                 to game content layers; it turns out that many of the
                 methods used to generate game content from one layer
                 can be used to generate content from another. We also
                 survey the use of methods in practice, that is, in
                 commercial or prototype games. Fourth and last, we
                 discuss several directions for future research in
                 PCG-G, which we believe deserve close attention in the
                 near future.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2013:IRQ,
  author =       "Dong Liu and Shuicheng Yan and Rong-Rong Ji and
                 Xian-Sheng Hua and Hong-Jiang Zhang",
  title =        "Image retrieval with query-adaptive hashing",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2422956.2422958",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:22 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Hashing-based approximate nearest-neighbor search may
                 well realize scalable content-based image retrieval.
                 The existing semantic-preserving hashing methods
                 leverage the labeled data to learn a fixed set of
                 semantic-aware hash functions. However, a fixed hash
                 function set is unable to well encode all semantic
                 information simultaneously, and ignores the specific
                 user's search intention conveyed by the query. In this
                 article, we propose a query-adaptive hashing method
                 which is able to generate the most appropriate binary
                 codes for different queries. Specifically, a set of
                 semantic-biased discriminant projection matrices are
                 first learnt for each of the semantic concepts, through
                 which a semantic-adaptable hash function set is learnt
                 via a joint sparsity variable selection model. At query
                 time, we further use the sparsity representation
                 procedure to select the most appropriate hash function
                 subset that is informative to the semantic information
                 conveyed by the query. Extensive experiments over three
                 benchmark image datasets well demonstrate the
                 superiority of our proposed query-adaptive hashing
                 method over the state-of-the-art ones in terms of
                 retrieval accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zheng:2013:GSD,
  author =       "Yan-Tao Zheng and Shuicheng Yan and Zheng-Jun Zha and
                 Yiqun Li and Xiangdong Zhou and Tat-Seng Chua and
                 Ramesh Jain",
  title =        "{GPSView}: a scenic driving route planner",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2422956.2422959",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:22 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "GPS devices have been widely used in automobiles to
                 compute navigation routes to destinations. The
                 generated driving route targets the minimal traveling
                 distance, but neglects the sightseeing experience of
                 the route. In this study, we propose an augmented GPS
                 navigation system, GPSView, to incorporate a scenic
                 factor into the routing. The goal of GPSView is to plan
                 a driving route with scenery and sightseeing qualities,
                 and therefore allow travelers to enjoy sightseeing on
                 the drive. To do so, we first build a database of
                 scenic roadways with vistas of landscapes and sights
                 along the roadside. Specifically, we adapt an
                 attention-based approach to exploit
                 community-contributed GPS-tagged photos on the Internet
                 to discover scenic roadways. The premise is: a
                 multitude of photos taken along a roadway imply that
                 this roadway is probably appealing and catches the
                 public's attention. By analyzing the geospatial
                 distribution of photos, the proposed approach discovers
                 the roadside sight spots, or Points-Of-Interest (POIs),
                 which have good scenic qualities and visibility to
                 travelers on the roadway. Finally, we formulate scenic
                 driving route planning as an optimization task towards
                 the best trade-off between sightseeing experience and
                 traveling distance. Testing in the northern California
                 area shows that the proposed system can deliver
                 promising results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhou:2013:SMV,
  author =       "Wengang Zhou and Houqiang Li and Yijuan Lu and Qi
                 Tian",
  title =        "{SIFT} match verification by geometric coding for
                 large-scale partial-duplicate web image search",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2422956.2422960",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:22 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Most large-scale image retrieval systems are based on
                 the bag-of-visual-words model. However, the traditional
                 bag-of-visual-words model does not capture the
                 geometric context among local features in images well,
                 which plays an important role in image retrieval. In
                 order to fully explore geometric context of all visual
                 words in images, efficient global geometric
                 verification methods have been attracting lots of
                 attention. Unfortunately, current existing methods on
                 global geometric verification are either
                 computationally expensive to ensure real-time response,
                 or cannot handle rotation well. To solve the preceding
                 problems, in this article, we propose a novel geometric
                 coding algorithm, to encode the spatial context among
                 local features for large-scale partial-duplicate Web
                 image retrieval. Our geometric coding consists of
                 geometric square coding and geometric fan coding, which
                 describe the spatial relationships of SIFT features
                 into three geo-maps for global verification to remove
                 geometrically inconsistent SIFT matches. Our approach
                 is not only computationally efficient, but also
                 effective in detecting partial-duplicate images with
                 rotation, scale changes, partial-occlusion, and
                 background clutter. Experiments in partial-duplicate
                 Web image search, using two datasets with one million
                 Web images as distractors, reveal that our approach
                 outperforms the baseline bag-of-visual-words approach
                 even following a RANSAC verification in mean average
                 precision. Besides, our approach achieves comparable
                 performance to other state-of-the-art global geometric
                 verification methods, for example, spatial coding
                 scheme, but is more computationally efficient.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Park:2013:ISL,
  author =       "Jong-Seung Park and Ramesh Jain",
  title =        "Identification of scene locations from geotagged
                 images",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2422956.2422961",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:22 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Due to geotagging capabilities of consumer cameras, it
                 has become easy to capture the exact geometric location
                 where a picture is taken. However, the location is not
                 the whereabouts of the scene taken by the photographer
                 but the whereabouts of the photographer himself. To
                 determine the actual location of an object seen in a
                 photo some sophisticated and tiresome steps are
                 required on a special camera rig, which are generally
                 not available in common digital cameras. This article
                 proposes a novel method to determine the geometric
                 location corresponding to a specific image pixel. A new
                 technique of stereo triangulation is introduced to
                 compute the relative depth of a pixel position.
                 Geographical metadata embedded in images are utilized
                 to convert relative depths to absolute coordinates.
                 When a geographic database is available we can also
                 infer the semantically meaningful description of a
                 scene object from where the specified pixel is
                 projected onto the photo. Experimental results
                 demonstrate the effectiveness of the proposed approach
                 in accurately identifying actual locations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wang:2013:RAA,
  author =       "Yichuan Wang and Ting-An Lin and Cheng-Hsin Hsu and
                 Xin Liu",
  title =        "Region- and action-aware virtual world clients",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1",
  pages =        "6:1--6:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2422956.2422962",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:22 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We propose region- and action-aware virtual world
                 clients. To develop such clients, we present a
                 parameterized network traffic model, based on a large
                 collection of Second Life traces gathered by us. Our
                 methodology is also applicable to virtual worlds other
                 than Second Life. With the traffic model, various
                 optimization criteria can be adopted, including visual
                 quality, response time, and energy consumption. We use
                 energy consumption as the show case, and demonstrate
                 via trace-driven simulations that, compared to two
                 existing schemes, a mobile client can save up to 36\%
                 and 41\% communication energy by selectively turning on
                 its WiFi network interface.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Khodabakhshi:2013:SSF,
  author =       "Naghmeh Khodabakhshi and Mohamed Hefeeda",
  title =        "{Spider}: a system for finding {$3$D} video copies",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1",
  pages =        "7:1--7:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2422956.2422963",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:22 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents a novel content-based copy
                 detection system for 3D videos. The system creates
                 compact and robust depth and visual signatures from the
                 3D videos. Then, signature of a query video is compared
                 against an indexed database of reference videos'
                 signatures. The system returns a score, using both
                 spatial and temporal characteristics of videos,
                 indicating whether the query video matches any video in
                 the reference video database, and in case of matching,
                 which portion of the reference video matches the query
                 video. Analysis shows that the system is efficient,
                 both computationally and storage-wise. The system can
                 be used, for example, by video content owners, video
                 hosting sites, and third-party companies to find
                 illegally copied 3D videos. We implemented Spider, a
                 complete realization of the proposed system, and
                 conducted rigorous experiments on it. Our experimental
                 results show that the proposed system can achieve high
                 accuracy in terms of precision and recall even if the
                 3D videos are subjected to several transformations at
                 the same time. For example, the proposed system yields
                 100\% precision and recall when copied videos are parts
                 of original videos, and more than 90\% precision and
                 recall when copied videos are subjected to different
                 individual transformations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Abrams:2013:WAG,
  author =       "Austin Abrams and Robert Pless",
  title =        "{Web}-accessible geographic integration and
                 calibration of webcams",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2422956.2422964",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Sun May 5 09:14:22 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "A global network of webcams offers unique viewpoints
                 from tens of thousands of locations. Understanding the
                 geographic context of this imagery is vital in using
                 these cameras for quantitative environmental monitoring
                 or surveillance applications. We derive robust
                 geo-calibration constraints that allow users to
                 geo-register static or pan-tilt-zoom cameras by
                 specifying a few corresponding points, and describe our
                 Web interface suitable for novices. We discuss design
                 decisions that support our scalable, publicly
                 accessible Web service that allows webcam textures to
                 be displayed live on 3D geographic models. Finally, we
                 demonstrate several multimedia applications for
                 geo-calibrated cameras.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Steinmetz:2013:EN,
  author =       "Ralf Steinmetz",
  title =        "Editorial note",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "31:1--31:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2523001.2523002",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Nahrstedt:2013:ISS,
  author =       "Klara Nahrstedt and Rainer Lienhart and Malcolm
                 Slaney",
  title =        "Introduction to the special section on the 20th
                 anniversary of the {ACM International Conference on
                 Multimedia}",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "32:1--32:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2523001.2523003",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Li:2013:TDI,
  author =       "Baochun Li and Zhi Wang and Jiangchuan Liu and Wenwu
                 Zhu",
  title =        "Two decades of {Internet} video streaming: a
                 retrospective view",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "33:1--33:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2505805",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "For over two decades, video streaming over the
                 Internet has received a substantial amount of attention
                 from both academia and industry. Starting from the
                 design of transport protocols for streaming video,
                 research interests have later shifted to the
                 peer-to-peer paradigm of designing streaming protocols
                 at the application layer. More recent research has
                 focused on building more practical and scalable
                 systems, using Dynamic Adaptive Streaming over HTTP. In
                 this article, we provide a retrospective view of the
                 research results over the past two decades, with a
                 focus on peer-to-peer streaming protocols and the
                 effects of cloud computing and social media.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Huang:2013:ETM,
  author =       "Zixia Huang and Klara Nahrstedt and Ralf Steinmetz",
  title =        "Evolution of temporal multimedia synchronization
                 principles: a historical viewpoint",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "34:1--34:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2490821",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The evolution of multimedia applications has
                 drastically changed human life and behaviors. New
                 communication technologies lead to new requirements for
                 multimedia synchronization. This article presents a
                 historical view of temporal synchronization studies
                 focusing on continuous multimedia. We demonstrate how
                 the development of multimedia systems has created new
                 challenges for synchronization technologies. We
                 conclude with a new application-dependent,
                 multilocation, multirequirement synchronization
                 framework to address these new challenges.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Bulterman:2013:SAM,
  author =       "Dick C. A. Bulterman and Pablo Cesar and Rodrigo
                 Laiola Guimar{\~a}es",
  title =        "Socially-aware multimedia authoring: {Past}, present,
                 and future",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "35:1--35:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2491893",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Creating compelling multimedia productions is a
                 nontrivial task. This is as true for creating
                 professional content as it is for nonprofessional
                 editors. During the past 20 years, authoring networked
                 content has been a part of the research agenda of the
                 multimedia community. Unfortunately, authoring has been
                 seen as an initial enterprise that occurs before `real'
                 content processing takes place. This limits the options
                 open to authors and to viewers of rich multimedia
                 content for creating and receiving focused, highly
                 personal media presentations. This article reflects on
                 the history of multimedia authoring. We focus on the
                 particular task of supporting socially-aware
                 multimedia, in which the relationships within
                 particular social groups among authors and viewers can
                 be exploited to create highly personal media
                 experiences. We provide an overview of the requirements
                 and characteristics of socially-aware multimedia
                 authoring within the context of exploiting community
                 content. We continue with a short historical
                 perspective on authoring support for these types of
                 situations. We then present an overview of a current
                 system for supporting socially-aware multimedia
                 authoring within the community content. We conclude
                 with a discussion of the issues that we feel can
                 provide a fruitful basis for future multimedia
                 authoring support. We argue that providing support for
                 socially-aware multimedia authoring can have a profound
                 impact on the nature and architecture of the entire
                 multimedia information processing pipeline.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhang:2013:IST,
  author =       "Lei Zhang and Yong Rui",
  title =        "Image search-from thousands to billions in 20 years",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "36:1--36:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2490823",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents a comprehensive review and
                 analysis on image search in the past 20 years,
                 emphasizing the challenges and opportunities brought by
                 the astonishing increase of dataset scales from
                 thousands to billions in the same time period, which
                 was witnessed first-hand by the authors as active
                 participants in this research area. Starting with a
                 retrospective review of three stages of image search in
                 the history, the article highlights major breakthroughs
                 around the year 2000 in image search features, indexing
                 methods, and commercial systems, which marked the
                 transition from stage two to stage three. Subsequent
                 sections describe the image search research from four
                 important aspects: system framework, feature extraction
                 and image representation, indexing, and big data's
                 potential. Based on the review, the concluding section
                 discusses open research challenges and suggests future
                 research directions in effective visual representation,
                 image knowledge base construction, implicit user
                 feedback and crowdsourcing, mobile image search, and
                 creative multimedia interfaces.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Rowe:2013:LFY,
  author =       "Lawrence A. Rowe",
  title =        "Looking forward 10 years to multimedia successes",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "37:1--37:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2490825",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "A panel at ACM Multimedia 2012 addressed research
                 successes in the past 20 years. While the panel focused
                 on the past, this article discusses successes since the
                 ACM SIGMM 2003 Retreat and suggests research directions
                 in the next ten years. While significant progress has
                 been made, more research is required to allow
                 multimedia to impact our everyday computing
                 environment. The importance of hardware changes on
                 future research directions is discussed. We believe
                 ubiquitous computing-meaning abundant computation and
                 network bandwidth-should be applied in novel ways to
                 solve multimedia grand challenges and continue the IT
                 revolution of the past century.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Shenoy:2013:MSR,
  author =       "Prashant Shenoy",
  title =        "Multimedia systems research: {The} first twenty years
                 and lessons for the next twenty",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "38:1--38:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2490859",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This retrospective article examines the past two
                 decades of multimedia systems research through the lens
                 of three research topics that were in vogue in the
                 early days of the field and offers perspectives on the
                 evolution of these research topics. We discuss the
                 eventual impact of each line of research and offer
                 lessons for future research in the field.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hua:2013:OVD,
  author =       "Kien A. Hua",
  title =        "Online video delivery: {Past}, present, and future",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "39:1--39:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2502435",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Video streaming is the core technology for online
                 video delivery systems. Initial research on this
                 technology faced many challenges. In this article,
                 lessons learned from beginning trials are discussed;
                 some pioneering works that provided early solutions and
                 inspired subsequent research are presented; and new
                 techniques required for emerging applications are
                 examined.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Swaminathan:2013:WMV,
  author =       "Viswanathan Swaminathan",
  title =        "Are we in the middle of a video streaming
                 revolution?",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "40:1--40:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2490826",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "It has been roughly 20 years since the beginning of
                 video streaming over the Internet. Until very recently,
                 video streaming experiences left much to be desired.
                 Over the last few years, this has significantly
                 improved making monetization of streaming, possible.
                 Recently, there has been an explosion of commercial
                 video delivery services over the Internet, sometimes
                 referred to as over-the-top (OTT) delivery. All these
                 services invariably use streaming technologies.
                 Initially, streaming had all the promise, then for a
                 long time, it was download and play, later progressive
                 download for short content, and now it is streaming
                 again. Did streaming win the download versus streaming
                 contest? Did the best technology win? The improvement
                 in streaming experience has been possible through a
                 variety of new streaming technologies, some proprietary
                 and others extensions to standard protocols. The
                 primary delivery mechanism for entertainment video,
                 both premium content like movies and user generated
                 content (UGC), tends to be HTTP streaming. Is HTTP
                 streaming the panacea for all problems? The goal of
                 this article is to give an industry perspective of what
                 fundamentally changed in video streaming that makes it
                 commercially viable now. This article outlines how a
                 blend of technology choices between download and
                 streaming makes the current wave of ubiquitous
                 streaming possible for entertainment video delivery.
                 After identifying problems that still need to be
                 solved, the article concludes with the lessons learnt
                 from the video streaming evolution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chou:2013:AIC,
  author =       "Philip A. Chou",
  title =        "Advances in immersive communication: (1) {Telephone},
                 (2) {Television}, (3) {Teleportation}",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "41:1--41:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2492704",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The last great advances in immersive communication
                 were the invention of the telephone over 137 years ago
                 and the invention of the video telephone (n{\'e}
                 television) over 86 years ago. However, a perfect storm
                 is brewing for the next advance in immersive
                 communication, thanks to the convergence of massive
                 amounts of computation, bandwidth, resolution, new
                 sensors, and new displays. It could well be the
                 Multimedia community that turns this brew into the next
                 great advance in immersive communication, something
                 akin to teleportation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chang:2013:HFW,
  author =       "Shih-Fu Chang",
  title =        "How far we've come: {Impact} of 20 years of multimedia
                 information retrieval",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "42:1--42:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2491844",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article reviews the major research trends that
                 emerged in the last two decades within the broad area
                 of multimedia information retrieval, with a focus on
                 the ACM Multimedia community. Trends are defined
                 (nonscientifically) to be topics that appeared in ACM
                 multimedia publications and have had a significant
                 number of citations. The article also assesses the
                 impacts of these trends on real-world applications. The
                 views expressed are subjective and likely biased but
                 hopefully useful for understanding the heritage of the
                 community and stimulating new research direction.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Effelsberg:2013:PLB,
  author =       "Wolfgang Effelsberg",
  title =        "A personal look back at twenty years of research in
                 multimedia content analysis",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "43:1--43:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2502434",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This paper is a personal look back at twenty years of
                 research in multimedia content analysis. It addresses
                 the areas of audio, photo and video analysis for the
                 purpose of indexing and retrieval from the perspective
                 of a multimedia researcher. Whereas a general analysis
                 of content is impossible due to the personal bias of
                 the user, significant progress was made in the
                 recognition of specific objects or events. The paper
                 concludes with a brief outlook on the future.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hanjalic:2013:MRM,
  author =       "Alan Hanjalic",
  title =        "Multimedia retrieval that matters",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "44:1--44:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2490827",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article emphasizes the need to refocus multimedia
                 information retrieval (MIR) research towards bridging
                 the utility gap, the gap between the expected and
                 defacto usefulness of MIR solutions. This requires us
                 to revisit the notion of relevance, but also to
                 consider other criteria for assessing MIR solutions,
                 like the informativeness of the retrieved results and
                 how helpful they are for the users. The article also
                 states that this focus shift cannot be realized
                 incrementally, but by revisiting the foundations of MIR
                 solutions, that is, by a utility-by-design approach. In
                 this respect, a number of research challenges are
                 proposed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Turk:2013:TYE,
  author =       "Matthew Turk",
  title =        "Over twenty years of eigenfaces",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "45:1--45:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2490824",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The inaugural ACM Multimedia Conference coincided with
                 a surge of interest in computer vision technologies for
                 detecting and recognizing people and their activities
                 in images and video. Face recognition was the first of
                 these topics to broadly engage the vision and
                 multimedia research communities. The Eigenfaces
                 approach was, deservedly or not, the method that
                 captured much of the initial attention, and it
                 continues to be taught and used as a benchmark over 20
                 years later. This article is a brief personal view of
                 the genesis of Eigenfaces for face recognition and its
                 relevance to the multimedia community.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Whitman:2013:CSF,
  author =       "Brian Whitman",
  title =        "Care and scale: {Fifteen} years of music retrieval",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "46:1--46:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2492703",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The co-founder of The Echo Nest, a music intelligence
                 company that now powers recommendation and discovery
                 for most music services, discusses the notion of care
                 and scale, cultural analysis of music, a brief history
                 of music retrieval, and how and why The Echo Nest got
                 started.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Szeliski:2013:NWC,
  author =       "Richard Szeliski and Noah Snavely and Steven M.
                 Seitz",
  title =        "Navigating the worldwide community of photos",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "47:1--47:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2492208",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The last decade has seen an explosion in the number of
                 photographs available on the Internet. The sheer volume
                 of interesting photos makes it a challenge to explore
                 this space. Various Web and social media sites, along
                 with search and indexing techniques, have been
                 developed in response. One natural way to navigate
                 these images in a 3D geo-located context. In this
                 article, we reflect on our work in this area, with a
                 focus on techniques that build partial 3D scene models
                 to help find and navigate interesting photographs in an
                 interactive, immersive 3D setting. We also discuss how
                 finding such relationships among photographs opens up
                 exciting new possibilities for multimedia authoring,
                 visualization, and editing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Andre:2013:EUU,
  author =       "Elisabeth Andre",
  title =        "Exploiting unconscious user signals in multimodal
                 human-computer interaction",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "48:1--48:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2502433",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents the idea of empathic stimulation
                 that relies on the power and potential of unconsciously
                 conveyed attentive and emotional information to
                 facilitate human-machine interaction. Starting from a
                 historical review of related work presented at past ACM
                 Multimedia conferences, we discuss challenges that
                 arise when exploiting unconscious human signals for
                 empathic stimulation, such as the real-time analysis of
                 psychological user states and the smooth adaptation of
                 the human-machine interface based on this analysis. A
                 classical application field that might benefit from the
                 idea of unconscious human-computer interaction is the
                 exploration of massive datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Sundaram:2013:EMS,
  author =       "Hari Sundaram",
  title =        "Experiential media systems",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "49:1--49:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2502432",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents a personalized narrative on the
                 early discussions within the Multimedia community and
                 the subsequent research on experiential media systems.
                 I discuss two different research initiatives-design of
                 real-time, immersive multimedia feedback environments
                 for stroke rehabilitation; exploratory environments for
                 events that exploited the user's ability to make
                 connections. I discuss the issue of foundations: the
                 question of multisensory integration and
                 superadditivity; the need for identification of
                 ``first-class'' Multimedia problems; expanding the
                 scope of Multimedia research.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Kompatsiaris:2013:ISS,
  author =       "Ioannis (Yiannis) Kompatsiaris and Wenjun (Kevin) Zeng
                 and Gang Hua and Liangliang Cao",
  title =        "Introduction to the special section of best papers of
                 {ACM} multimedia 2012",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "50:1--50:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2523001.2523004",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2013:RAM,
  author =       "Heng Liu and Tao Mei and Houqiang Li and Jiebo Luo and
                 Shipeng Li",
  title =        "Robust and accurate mobile visual localization and its
                 applications",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "51:1--51:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2491735",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Mobile applications are becoming increasingly popular.
                 More and more people are using their phones to enjoy
                 ubiquitous location-based services (LBS). The
                 increasing popularity of LBS creates a fundamental
                 problem: mobile localization. Besides traditional
                 localization methods that use GPS or wireless signals,
                 using phone-captured images for localization has drawn
                 significant interest from researchers. Photos contain
                 more scene context information than the embedded
                 sensors, leading to a more precise location
                 description. With the goal being to accurately sense
                 real geographic scene contexts, this article presents a
                 novel approach to mobile visual localization according
                 to a given image (typically associated with a rough GPS
                 position). The proposed approach is capable of
                 providing a complete set of more accurate parameters
                 about the scene geo-context including the real
                 locations of both the mobile user and perhaps more
                 importantly the captured scene, as well as the viewing
                 direction. To figure out how to make image localization
                 quick and accurate, we investigate various techniques
                 for large-scale image retrieval and 2D-to-3D matching.
                 Specifically, we first generate scene clusters using
                 joint geo-visual clustering, with each scene being
                 represented by a reconstructed 3D model from a set of
                 images. The 3D models are then indexed using a visual
                 vocabulary tree structure. Taking geo-tags of the
                 database image as prior knowledge, a novel
                 location-based codebook weighting scheme proposed to
                 embed this additional information into the codebook.
                 The discriminative power of the codebook is enhanced,
                 thus leading to better image retrieval performance. The
                 query image is aligned with the models obtained from
                 the image retrieval results, and eventually registered
                 to a real-world map. We evaluate the effectiveness of
                 our approach using several large-scale datasets and
                 achieving estimation accuracy of a user's location
                 within 13 meters, viewing direction within 12 degrees,
                 and viewing distance within 26 meters. Of particular
                 note is our showcase of three novel applications based
                 on localization results: (1) an on-the-spot tour guide,
                 (2) collaborative routing, and (3) a sight-seeing
                 guide. The evaluations through user studies demonstrate
                 that these applications are effective in facilitating
                 the ideal rendezvous for mobile users.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wang:2013:PBS,
  author =       "Zhi Wang and Wenwu Zhu and Xiangwen Chen and Lifeng
                 Sun and Jiangchuan Liu and Minghua Chen and Peng Cui
                 and Shiqiang Yang",
  title =        "Propagation-based social-aware multimedia content
                 distribution",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "52:1--52:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2523001.2523005",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Online social networks have reshaped how multimedia
                 contents are generated, distributed, and consumed on
                 today's Internet. Given the massive number of
                 user-generated contents shared in online social
                 networks, users are moving to directly access these
                 contents in their preferred social network services. It
                 is intriguing to study the service provision of social
                 contents for global users with satisfactory quality of
                 experience. In this article, we conduct large-scale
                 measurement of a real-world online social network
                 system to study the social content propagation. We have
                 observed important propagation patterns, including
                 social locality, geographical locality, and temporal
                 locality. Motivated by the measurement insights, we
                 propose a propagation-based social-aware delivery
                 framework using a hybrid edge-cloud and peer-assisted
                 architecture. We also design replication strategies for
                 the architecture based on three propagation predictors
                 designed by jointly considering user, content, and
                 context information. In particular, we design a
                 propagation region predictor and a global audience
                 predictor to guide how the edge-cloud servers backup
                 the contents, and a local audience predictor to guide
                 how peers cache the contents for their friends. Our
                 trace-driven experiments further demonstrate the
                 effectiveness and superiority of our design.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Sang:2013:SIA,
  author =       "Jitao Sang and Changsheng Xu",
  title =        "Social influence analysis and application on
                 multimedia sharing websites",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "1s",
  pages =        "53:1--53:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2502436",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:45 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Social media is becoming popular these days, where
                 users necessarily interact with each other to form
                 social networks. Influence network, as one special case
                 of social network, has been recognized as significantly
                 impacting social activities and user decisions. We
                 emphasize in this article that the inter-user influence
                 is essentially topic-sensitive, as for different tasks
                 users tend to trust different influencers and be
                 influenced most by them. While existing research
                 focuses on global influence modeling and applies to
                 text-based networks, this work investigates the problem
                 of topic-sensitive influence modeling in the multimedia
                 domain. According to temporal data justification, we
                 propose a multimodal probabilistic model, considering
                 both users' textual annotation and uploaded visual
                 images. This model is capable of simultaneously
                 extracting user topic distributions and topic-sensitive
                 influence strengths. By identifying the topic-sensitive
                 influencer, we are able to conduct applications, like
                 collective search and collaborative recommendation. A
                 risk minimization-based general framework for
                 personalized image search is further presented, where
                 the image search task is transferred to measure the
                 distance of image and personalized query language
                 models. The framework considers the noisy tag issue and
                 enables easy incorporation of social influence. We have
                 conducted experiments on a large-scale Flickr dataset.
                 Qualitative as well as quantitative evaluation results
                 have validated the effectiveness of the topic-sensitive
                 influencer mining model, and demonstrated the advantage
                 of incorporating topic-sensitive influence in
                 personalized image search and topic-based image
                 recommendation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Silva:2013:HPH,
  author =       "Juan M. Silva and Mauricio Orozco and Jongeun Cha and
                 Abdulmotaleb {El Saddik} and Emil M. Petriu",
  title =        "Human perception of haptic-to-video and
                 haptic-to-audio skew in multimedia applications",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "2",
  pages =        "9:1--9:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2457450.2457451",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:48 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The purpose of this research is to assess the
                 sensitivity of humans to perceive asynchrony among
                 media signals coming from a computer application.
                 Particularly we examine haptic-to-video and
                 haptic-to-audio skew. For this purpose we have designed
                 an experimental setup, where users are exposed to a
                 basic multimedia presentation resembling a ping-pong
                 game. For every collision between a ball and a racket,
                 the user is able to perceive auditory, visual, and
                 haptic cues about the collision event. We artificially
                 introduce negative and positive delay to the auditory
                 and visual cues with respect to the haptic stream. We
                 subjectively evaluate the perception of inter-stream
                 asynchrony perceived by the users using two types of
                 haptic devices. The statistical results of our
                 evaluation show perception rates of around 100 ms
                 regardless of modality and type of device.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Bhatt:2013:RPB,
  author =       "Chidansh A. Bhatt and Pradeep K. Atrey and Mohan S.
                 Kankanhalli",
  title =        "A reward-and-punishment-based approach for concept
                 detection using adaptive ontology rules",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "2",
  pages =        "10:1--10:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2457450.2457452",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:48 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Despite the fact that performance improvements have
                 been reported in the last years, semantic concept
                 detection in video remains a challenging problem.
                 Existing concept detection techniques, with ontology
                 rules, exploit the static correlations among primitive
                 concepts but not the dynamic spatiotemporal
                 correlations. The proposed method rewards (or punishes)
                 detected primitive concepts using dynamic
                 spatiotemporal correlations of the given ontology rules
                 and updates these ontology rules based on the accuracy
                 of detection. Adaptively learned ontology rules
                 significantly help in improving the overall accuracy of
                 concept detection as shown in the experimental
                 result.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Alsulaiman:2013:IVB,
  author =       "Fawaz A. Alsulaiman and Nizar Sakr and Julio J.
                 Vald{\'e}s and Abdulmotaleb {El Saddik}",
  title =        "Identity verification based on handwritten signatures
                 with haptic information using genetic programming",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "2",
  pages =        "11:1--11:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2457450.2457453",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:48 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article, haptic-based handwritten signature
                 verification using Genetic Programming (GP)
                 classification is presented. A comparison of GP-based
                 classification with classical classifiers including
                 support vector machine, $k$-nearest neighbors,
                 na{\"\i}ve Bayes, and random forest is conducted. In
                 addition, the use of GP in discovering small
                 knowledge-preserving subsets of features in
                 high-dimensional datasets of haptic-based signatures is
                 investigated and several approaches are explored.
                 Subsets of features extracted from GP-generated models
                 (analytic functions) are also exploited to determine
                 the importance and relevance of different haptic data
                 types (e.g., force, position, torque, and orientation)
                 in user identity verification. The results revealed
                 that GP classifiers compare favorably with the
                 classical methods and use a much fewer number of
                 attributes (with simple function sets).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhang:2013:MAS,
  author =       "Qianni Zhang and Ebroul Izquierdo",
  title =        "Multifeature analysis and semantic context learning
                 for image classification",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "2",
  pages =        "12:1--12:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2457450.2457454",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:48 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article introduces an image classification
                 approach in which the semantic context of images and
                 multiple low-level visual features are jointly
                 exploited. The context consists of a set of semantic
                 terms defining the classes to be associated to
                 unclassified images. Initially, a multiobjective
                 optimization technique is used to define a multifeature
                 fusion model for each semantic class. Then, a Bayesian
                 learning procedure is applied to derive a context model
                 representing relationships among semantic classes.
                 Finally, this context model is used to infer object
                 classes within images. Selected results from a
                 comprehensive experimental evaluation are reported to
                 show the effectiveness of the proposed approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhao:2013:MEU,
  author =       "Zhen Wei Zhao and Sameer Samarth and Wei Tsang Ooi",
  title =        "Modeling the effect of user interactions on mesh-based
                 {P2P VoD} streaming systems",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "2",
  pages =        "13:1--13:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2457450.2457455",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:48 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "User interactions such as seeks and pauses are widely
                 supported by existing Peer-to-Peer Video-on-Demand (P2P
                 VoD) streaming systems. Their effect on the streaming
                 system, however, has not been well studied. Seeks cause
                 peers to skip part of the video, making them stay in
                 the system for shorter time, and thus contribute less.
                 On the other hand, only part of the video is downloaded
                 due to seeks, reducing peers' demand from the system.
                 It is unclear which factor dominates the effect of
                 seeks on the streaming system. Pauses during playback,
                 on one hand, allow peers to stay longer in the system
                 and upload more content. When interleaved with seeks,
                 however, long pauses may increase peers' demand
                 unnecessarily as peers may download content that will
                 eventually be skipped by subsequent forward seeks. The
                 collective effect of seeks and pauses, together with
                 the known random peer departure, is unintuitive and
                 needs to be addressed properly so as to understand the
                 effect of human factors on the streaming system
                 performance. In this article, we develop an analytical
                 model to both qualitatively and quantitatively study
                 the effect of seeks and pauses on mesh-based P2P VoD
                 streaming systems, in particular, the effect on the
                 server cost. Our model can help in understanding how
                 human factors such as seeks and pauses affect the
                 streaming system performance, tuning a P2P VoD system
                 towards better system performance and stability, and
                 providing a framework for capacity planning.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yang:2013:ETT,
  author =       "Yang Yang and Yi Yang and Heng Tao Shen",
  title =        "Effective transfer tagging from image to video",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "2",
  pages =        "14:1--14:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2457450.2457456",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:48 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Recent years have witnessed a great explosion of
                 user-generated videos on the Web. In order to achieve
                 an effective and efficient video search, it is critical
                 for modern video search engines to associate videos
                 with semantic keywords automatically. Most of the
                 existing video tagging methods can hardly achieve
                 reliable performance due to deficiency of training
                 data. It is noticed that abundant well-tagged data are
                 available in other relevant types of media (e.g.,
                 images). In this article, we propose a novel video
                 tagging framework, termed as Cross-Media Tag Transfer
                 (CMTT), which utilizes the abundance of well-tagged
                 images to facilitate video tagging. Specifically, we
                 build a ``cross-media tunnel'' to transfer knowledge
                 from images to videos. To this end, an optimal kernel
                 space, in which distribution distance between images
                 and video is minimized, is found to tackle the
                 domain-shift problem. A novel cross-media video tagging
                 model is proposed to infer tags by exploring the
                 intrinsic local structures of both labeled and
                 unlabeled data, and learn reliable video classifiers.
                 An efficient algorithm is designed to optimize the
                 proposed model in an iterative and alternative way.
                 Extensive experiments illustrate the superiority of our
                 proposal compared to the state-of-the-art algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhao:2013:AAP,
  author =       "Zhen Wei Zhao and Wei Tsang Ooi",
  title =        "{APRICOD}: an access-pattern-driven distributed
                 caching middleware for fast content discovery of
                 noncontinuous media access",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "2",
  pages =        "15:1--15:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2457450.2457457",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:48 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Content discovery is a major source of latency in
                 peer-to-peer (P2P) media streaming systems, especially
                 in the presence of noncontinuous user access, such as
                 random seek in Video-on-Demand (VoD) streaming and
                 teleportation in a Networked Virtual Environment (NVE).
                 After the aforementioned user interactions, streaming
                 systems often need to initiate the content discovery
                 process to identify where to retrieve the requested
                 media objects. Short content lookup latency is demanded
                 to ensure smooth user experience. Existing content
                 discovery systems based on either a Distributed Hash
                 Table (DHT) or gossip mechanism cannot cope with
                 noncontinuous access efficiently due to their long
                 lookup latency. In this work, we propose an
                 access-pattern-driven distributed caching middleware
                 named APRICOD, which caters for fast and scalable
                 content discovery in peer-to-peer media streaming
                 systems, especially when user interactions are present.
                 APRICOD exploits correlations among media objects
                 accessed by users, and adapts to shift in the user
                 access pattern automatically. We first present a
                 general APRICOD design that can be used with any
                 existing content discovery system. We then present an
                 implementation of APRICOD on top of Pastry, which we
                 use to evaluate APRICOD. Our evaluation in a 1024-node
                 system, using a Second Life trace with 5,735 users and
                 a VoD trace with 54 users, shows that APRICOD can
                 effectively resolve all continuous access queries with
                 a single hop deterministically with node failure as an
                 exception, and resolve noncontinuous access queries
                 with a single hop with high probability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Anonymous:2013:CPM,
  author =       "Anonymous",
  title =        "Call for papers: {Multiple} sensorial {(MulSeMedia)}
                 multi-modal media: {Advances} and applications",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "3",
  pages =        "15:1--15:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2487268.2500818",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:50 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Mei:2013:NLS,
  author =       "Tao Mei and Lin-Xie Tang and Jinhui Tang and
                 Xian-Sheng Hua",
  title =        "Near-lossless semantic video summarization and its
                 applications to video analysis",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "3",
  pages =        "16:1--16:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2487268.2487269",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:50 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The ever increasing volume of video content on the Web
                 has created profound challenges for developing
                 efficient indexing and search techniques to manage
                 video data. Conventional techniques such as video
                 compression and summarization strive for the two
                 commonly conflicting goals of low storage and high
                 visual and semantic fidelity. With the goal of
                 balancing both video compression and summarization,
                 this article presents a novel approach, called
                 Near-Lossless Semantic Summarization (NLSS), to
                 summarize a video stream with the least high-level
                 semantic information loss by using an extremely small
                 piece of metadata. The summary consists of compressed
                 image and audio streams, as well as the metadata for
                 temporal structure and motion information. Although at
                 a very low compression rate (around $ 1 / 4 $0; of
                 H.264 baseline, where traditional compression
                 techniques can hardly preserve an acceptable visual
                 fidelity), the proposed NLSS still can be applied to
                 many video-oriented tasks, such as visualization,
                 indexing and browsing, duplicate detection, concept
                 detection, and so on. We evaluate the NLSS on TRECVID
                 and other video collections, and demonstrate that it is
                 a powerful tool for significantly reducing storage
                 consumption, while keeping high-level semantic
                 fidelity.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ademoye:2013:IRT,
  author =       "Oluwakemi A. Ademoye and Gheorghita Ghinea",
  title =        "Information recall task impact in olfaction-enhanced
                 multimedia",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "3",
  pages =        "17:1--17:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2487268.2487270",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:50 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Enhancing multimedia applications with olfactory
                 sensations is one of the last challenges in the area.
                 While there is evidence, both scientific and anecdotal,
                 that olfactory cues help users in information recall
                 tasks, there is a lack of work when the targeted
                 information is one contained in a multimedia
                 presentation, which is precisely the focus of this
                 article. Accordingly, we present the results of two
                 experimental studies. The first study measured the
                 impact of olfactory media variation on the user's
                 ability to perceive, synthesize, and analyze the
                 informational content of olfactory-enhanced multimedia
                 videos; the second study measured the impact of
                 information content, and an information recall task in
                 respect of user perception of the relevance, sense of
                 reality, and acceptability of the olfactory media
                 content, as well as the overall enjoyment of the
                 experience. Results show that the use of olfactory
                 media content, both pleasant and unpleasant, in
                 multimedia displays does not significantly impact on
                 information assimilation in a negative way. Moreover,
                 the addition of a performance task may enhance the
                 user's understanding of the correlation between the
                 characteristic odor(s) and the scenario under
                 consideration, as well as enable users to consciously
                 learn the odors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yeh:2013:CAS,
  author =       "Lo-Yao Yeh and Jiun-Long Huang",
  title =        "A conditional access system with efficient key
                 distribution and revocation for mobile pay-{TV}
                 systems",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "3",
  pages =        "18:1--18:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2487268.2487271",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:50 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Current mobile pay-TV systems have two types of
                 Conditional Access Systems (CAS): group-key-based and
                 public-key systems. The best feature of group-key-based
                 systems is the ability to enjoy the broadcast nature in
                 delivery multimedia contents, while the major advantage
                 of public-key systems is consolidating the security
                 foundation to withstand various attacks, such as
                 collusion attacks. However, the problems of
                 group-key-based systems include collusion attacks, lack
                 of nonrepudiation, and troublesome key distribution.
                 Even worse, the benefit of broadcast efficiency is
                 confined to a group size of no more than 512
                 subscribers. For public-key systems, the poor delivery
                 scalability is the major shortcoming because the unique
                 private key feature is only suitable for one-to-one
                 delivery. In this article, we introduce a scalable
                 access control scheme to integrate the merits of
                 broadcasting regardless of group size and sound
                 security assurance, including fine-grained access
                 control and collusion attack resistance. For subscriber
                 revocation, a single message is broadcast to the other
                 subscribers to get the updated key, thus significantly
                 boosting subscriber revocation scalability. Due to
                 mobile subscribers' dynamic movements, this article
                 also analyzes the benefit of retransmission cases in
                 our system. Through the performance evaluation and
                 functionality comparison, the proposed scheme should be
                 a decent candidate to enhance the security strength and
                 transmission efficiency in a mobile pay-TV system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Naskar:2013:GTL,
  author =       "Ruchira Naskar and Rajat Subhra Chakraborty",
  title =        "A generalized tamper localization approach for
                 reversible watermarking algorithms",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "3",
  pages =        "19:1--19:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2487268.2487272",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:50 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In general reversible watermarking algorithms, the
                 convention is to reject the entire cover image at the
                 receiver end if it fails authentication, since there is
                 no way to detect the exact locations of tampering. This
                 feature may be exploited by an adversary to bring about
                 a form of DoS attack. Here we provide a solution to
                 this problem in form of a tamper localization mechanism
                 for reversible watermarking algorithms, which allows
                 selective rejection of distorted cover image regions in
                 case of authentication failure, thus avoiding rejection
                 of the complete image. Additionally it minimizes the
                 bandwidth requirement of the communication channel.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Doherty:2013:SSA,
  author =       "Jonathan Doherty and Kevin Curran and Paul Mckevitt",
  title =        "A self-similarity approach to repairing large dropouts
                 of streamed music",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "3",
  pages =        "20:1--20:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2487268.2487273",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:50 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Enjoyment of audio has now become about flexibility
                 and personal freedom. Digital audio content can be
                 acquired from many sources and wireless networking
                 allows digital media devices and associated peripherals
                 to be unencumbered by wires. However, despite recent
                 improvements in capacity and quality of service,
                 wireless networks are inherently unreliable
                 communications channels for the streaming of audio,
                 being susceptible to the effects of range,
                 interference, and occlusion. This time-varying
                 reliability of wireless audio transfer introduces data
                 corruption and loss, with unpleasant audible effects
                 that can be profound and prolonged in duration.
                 Traditional communications techniques for error
                 mitigation perform poorly and in a bandwidth
                 inefficient manner in the presence of such large-scale
                 defects in a digital audio stream. A novel solution
                 that can complement existing techniques takes account
                 of the semantics and natural repetition of music.
                 Through the use of self-similarity metadata, missing or
                 damaged audio segments can be seamlessly replaced with
                 similar undamaged segments that have already been
                 successfully received. We propose a technology to
                 generate relevant self-similarity metadata for
                 arbitrary audio material and to utilize this metadata
                 within a wireless audio receiver to provide
                 sophisticated and real-time correction of large-scale
                 errors. The primary objectives are to match the current
                 section of a song being received with previous sections
                 while identifying incomplete sections and determining
                 replacements based on previously received portions of
                 the song. This article outlines our approach to Forward
                 Error Correction (FEC) technology that is used to
                 ``repair'' a bursty dropout when listening to
                 time-dependent media on a wireless network. Using
                 self-similarity analysis on a music file, we can
                 ``automatically'' repair the dropout with a similar
                 portion of the music already received thereby
                 minimizing a listener's discomfort.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ho:2013:IPC,
  author =       "Edmond S. L. Ho and Jacky C. P. Chan and Taku Komura
                 and Howard Leung",
  title =        "Interactive partner control in close interactions for
                 real-time applications",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "3",
  pages =        "21:1--21:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2487268.2487274",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:50 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents a new framework for synthesizing
                 motion of a virtual character in response to the
                 actions performed by a user-controlled character in
                 real time. In particular, the proposed method can
                 handle scenes in which the characters are closely
                 interacting with each other such as those in partner
                 dancing and fighting. In such interactions,
                 coordinating the virtual characters with the human
                 player automatically is extremely difficult because the
                 system has to predict the intention of the player
                 character. In addition, the style variations from
                 different users affect the accuracy in recognizing the
                 movements of the player character when determining the
                 responses of the virtual character. To solve these
                 problems, our framework makes use of the spatial
                 relationship-based representation of the body parts
                 called interaction mesh, which has been proven
                 effective for motion adaptation. The method is
                 computationally efficient, enabling real-time character
                 control for interactive applications. We demonstrate
                 its effectiveness and versatility in synthesizing a
                 wide variety of motions with close interactions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Steinmetz:2013:ER,
  author =       "Ralf Steinmetz",
  title =        "Editorial: Reviewers",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "4",
  pages =        "22:1--22:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501643.2501644",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Sakai:2013:PPC,
  author =       "Kazuya Sakai and Wei-Shinn Ku and Min-Te Sun and Roger
                 Zimmermann",
  title =        "Privacy preserving continuous multimedia streaming in
                 {MANETs}",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "4",
  pages =        "23:1--23:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501643.2501645",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "At present, mobile devices are prevalent with end
                 users and continuous media streaming services in mobile
                 ad-hoc networks (MANETs) support popular applications.
                 It is required for applications that stream isochronous
                 media that the network link be continuously available.
                 In this study, we introduce two group-server scheduling
                 schemes to improve link continuity: static group-server
                 scheduling and dynamic group-server scheduling. With
                 our solution, if one of the current links between a
                 client and a server instance breaks, the client can
                 still download the multimedia content from another
                 scheduled server peer. In addition, we incorporate the
                 data link layer constraints as well as privacy concerns
                 into our protocol design. The simulation results show
                 that the proposed schemes significantly improve the
                 effective link duration, overall system performance,
                 and degree of privacy in MANETs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Dong:2013:RIA,
  author =       "Jian Dong and Bin Cheng and Xiangyu Chen and Tat-Seng
                 Chua and Shuicheng Yan and Xi Zhou",
  title =        "Robust image annotation via simultaneous feature and
                 sample outlier pursuit",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "4",
  pages =        "24:1--24:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501643.2501646",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Graph-based semi-supervised image annotation has
                 achieved great success in a variety of studies, yet it
                 essentially and intuitively suffers from both the
                 irrelevant/noisy features (referred to as feature
                 outliers) and the unusual/corrupted samples (referred
                 to as sample outliers). In this work, we investigate
                 how to derive robust sample affinity matrix via
                 simultaneous feature and sample outlier pursuit. This
                 task is formulated as a Dual-outlier and Prior-driven
                 Low-Rank Representation (DP-LRR) problem, which
                 possesses convexity in objective function. In DP-LRR,
                 the clean data are assumed to be self-reconstructible
                 with low-rank coefficient matrix as in LRR; while the
                 error matrix is decomposed as the sum of a row-wise
                 sparse matrix and a column-wise sparse matrix, the
                 l$_{2, 1}$ -norm minimization of which encourages the
                 pursuit of feature and sample outliers respectively.
                 The DP-LRR is further regularized by the priors from
                 side information, that is, the inhomogeneous data
                 pairs. An efficient iterative procedure based on
                 linearized alternating direction method is presented to
                 solve the DP-LRR problem, with closed-form solutions
                 within each iteration. The derived low-rank
                 reconstruction coefficient matrix is then fed into any
                 graph based semi-supervised label propagation algorithm
                 for image annotation, and as a by-product, the cleaned
                 data from DP-LRR can also be utilized as a better image
                 representation to generally boost image annotation
                 performance. Extensive experiments on MIRFlickr,
                 Corel30K, NUS-WIDE-LITE and NUS-WIDE databases well
                 demonstrate the effectiveness of the proposed
                 formulation for robust image annotation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Villanueva:2013:HMB,
  author =       "Arantxa Villanueva and Victoria Ponz and Laura
                 Sesma-Sanchez and Mikel Ariz and Sonia Porta and Rafael
                 Cabeza",
  title =        "Hybrid method based on topography for robust detection
                 of iris center and eye corners",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "4",
  pages =        "25:1--25:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501643.2501647",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "A multistage procedure to detect eye features is
                 presented. Multiresolution and topographic
                 classification are used to detect the iris center. The
                 eye corner is calculated combining valley detection and
                 eyelid curve extraction. The algorithm is tested in the
                 BioID database and in a proprietary database containing
                 more than 1200 images. The results show that the
                 suggested algorithm is robust and accurate. Regarding
                 the iris center our method obtains the best average
                 behavior for the BioID database compared to other
                 available algorithms. Additional contributions are that
                 our algorithm functions in real time and does not
                 require complex post processing stages.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wang:2013:ECR,
  author =       "Bo Wang and Jinqiao Wang and Hanqing Lu",
  title =        "Exploiting content relevance and social relevance for
                 personalized ad recommendation on {Internet TV}",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "4",
  pages =        "26:1--26:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501643.2501648",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "There have been not many interactions between the two
                 dominant forms of mass communication: television and
                 the Internet, while nowadays the appearance of Internet
                 television makes them more closely. Different with
                 traditional TV in a passive mode of transmission,
                 Internet TV makes it more possible to make personalized
                 service recommendation because of the interactivity
                 between users and the Internet. In this article, we
                 introduce a scheme to provide targeted ad
                 recommendation to Internet TV users by exploiting the
                 content relevance and social relevance. First, we
                 annotate TV videos in terms of visual content analysis
                 and textual analysis by aligning visual and textual
                 information. Second, with user-user, video-video and
                 user-video relationships, we employ Multi-Relationship
                 based Probabilistic Matrix Factorization (MRPMF) to
                 learn representative tags for modeling user preference.
                 And then semantic content relevance (between product/ad
                 and TV video) and social relevance (between product/ad
                 and user interest) are calculated by projecting the
                 corresponding tags into our advertising concept space.
                 Finally, with relevancy scores we make ranking for
                 relevant product/ads to effectively provide users
                 personalized recommendation. The experimental results
                 demonstrate attractiveness and effectiveness of our
                 proposed approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Alam:2013:MHB,
  author =       "Kazi Masudul Alam and Abu Saleh Md Mahfujur Rahman and
                 Abdulmotaleb {El Saddik}",
  title =        "Mobile haptic e-book system to support {$3$D}
                 immersive reading in ubiquitous environments",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "4",
  pages =        "27:1--27:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501643.2501649",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In order to leverage the use of various modalities
                 such as audio-visual materials in instilling effective
                 learning behavior we present an intuitive approach of
                 annotation based hapto-audio-visual interaction with
                 the traditional digital learning materials such as
                 e-books. By integrating the home entertainment system
                 in the user's reading experience combined with haptic
                 interfaces we want to examine whether such augmentation
                 of modalities influence the user's learning patterns.
                 The proposed Haptic E--Book (HE-Book) system leverages
                 the haptic jacket, haptic arm band as well as haptic
                 sofa interfaces to receive haptic emotive signals
                 wirelessly in the form of patterned vibrations of the
                 actuators and expresses the learning material by
                 incorporating image, video, 3D environment based
                 augmented display in order to pave ways for intimate
                 reading experience in the popular mobile e-book
                 platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Nguyen:2013:TDA,
  author =       "Tam V. Nguyen and Si Liu and Bingbing Ni and Jun Tan
                 and Yong Rui and Shuicheng Yan",
  title =        "Towards decrypting attractiveness via multi-modality
                 cues",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "4",
  pages =        "28:1--28:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501643.2501650",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Decrypting the secret of beauty or attractiveness has
                 been the pursuit of artists and philosophers for
                 centuries. To date, the computational model for
                 attractiveness estimation has been actively explored in
                 computer vision and multimedia community, yet with the
                 focus mainly on facial features. In this article, we
                 conduct a comprehensive study on female attractiveness
                 conveyed by single/multiple modalities of cues, that
                 is, face, dressing and/or voice, and aim to discover
                 how different modalities individually and collectively
                 affect the human sense of beauty. To extensively
                 investigate the problem, we collect the Multi-Modality
                 Beauty (M$^2$ B) dataset, which is annotated with
                 attractiveness levels converted from manual $k$-wise
                 ratings and semantic attributes of different
                 modalities. Inspired by the common consensus that
                 middle-level attribute prediction can assist
                 higher-level computer vision tasks, we manually labeled
                 many attributes for each modality. Next, a tri-layer
                 Dual-supervised Feature-Attribute-Task (DFAT) network
                 is proposed to jointly learn the attribute model and
                 attractiveness model of single/multiple modalities. To
                 remedy possible loss of information caused by
                 incomplete manual attributes, we also propose a novel
                 Latent Dual-supervised Feature-Attribute-Task (LDFAT)
                 network, where latent attributes are combined with
                 manual attributes to contribute to the final
                 attractiveness estimation. The extensive experimental
                 evaluations on the collected M$^2$ B dataset well
                 demonstrate the effectiveness of the proposed DFAT and
                 LDFAT networks for female attractiveness prediction.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Tang:2013:TOH,
  author =       "Jinhui Tang and Qiang Chen and Meng Wang and Shuicheng
                 Yan and Tat-Seng Chua and Ramesh Jain",
  title =        "Towards optimizing human labeling for interactive
                 image tagging",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "4",
  pages =        "29:1--29:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501643.2501651",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Interactive tagging is an approach that combines human
                 and computer to assign descriptive keywords to image
                 contents in a semi-automatic way. It can avoid the
                 problems in automatic tagging and pure manual tagging
                 by achieving a compromise between tagging performance
                 and manual cost. However, conventional research efforts
                 on interactive tagging mainly focus on sample selection
                 and models for tag prediction. In this work, we
                 investigate interactive tagging from a different
                 aspect. We introduce an interactive image tagging
                 framework that can more fully make use of human's
                 labeling efforts. That means, it can achieve a
                 specified tagging performance by taking less manual
                 labeling effort or achieve better tagging performance
                 with a specified labeling cost. In the framework,
                 hashing is used to enable a quick clustering of image
                 regions and a dynamic multiscale clustering labeling
                 strategy is proposed such that users can label a large
                 group of similar regions each time. We also employ a
                 tag refinement method such that several inappropriate
                 tags can be automatically corrected. Experiments on a
                 large dataset demonstrate the effectiveness of our
                 approach",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Carbunar:2013:FNA,
  author =       "Bogdan Carbunar and Rahul Potharaju and Michael Pearce
                 and Venugopal Vasudevan and Michael Needham",
  title =        "A framework for network aware caching for video on
                 demand systems",
  journal =      j-TOMCCAP,
  volume =       "9",
  number =       "4",
  pages =        "30:1--30:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501643.2501652",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:51 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  note =         "See errata \cite{Carbunar:2014:EFN}.",
  abstract =     "Video on Demand (VoD) services allow users to select
                 and locally consume remotely stored content. We
                 investigate the use of caching to solve the scalability
                 issues of several existing VoD providers. We propose
                 metrics and goals that define the requirements of a
                 caching framework for CDNs of VoD systems. Using data
                 logs collected from Motorola equipment from Comcast VoD
                 deployments we show that several classic caching
                 solutions do not satisfy the proposed goals. We address
                 this issue by developing novel techniques for
                 predicting future values of several metrics of
                 interest. We rely on computed predictions to define the
                 penalty imposed on the system, both network and caching
                 sites, when not storing individual items. We use item
                 penalties to devise novel caching and static content
                 placement strategies. We use the previously mentioned
                 data logs to validate our solutions and show that they
                 satisfy all the defined goals.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Li:2013:ENO,
  author =       "Zechao Li and Jing Liu and Meng Wang and Changsheng Xu
                 and Hanqing Lu",
  title =        "Enhancing news organization for convenient retrieval
                 and browsing",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "1",
  pages =        "1:1--1:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2488732",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "To facilitate users to access news quickly and
                 comprehensively, we design a news search and browsing
                 system named GeoVisNews, in which the news elements of
                 ``Where'', ``Who'', ``What'' and ``When'' are enhanced
                 via news geo-localization, image enrichment and joint
                 ranking, respectively. For news geo-localization, an
                 Ordinal Correlation Consistent Matrix Factorization
                 (OCCMF) model is proposed to maintain the relevance
                 rankings of locations to a specific news document and
                 simultaneously capture intra-relations among locations
                 and documents. To visualize news, we develop a novel
                 method to enrich news documents with appropriate web
                 images. Specifically, multiple queries are first
                 generated from news documents for image search, and
                 then the appropriate images are selected from the
                 collected web images by an intelligent fusion approach
                 based on multiple features. Obtaining the geo-localized
                 and image enriched news resources, we further employ a
                 joint ranking strategy to provide relevant, timely and
                 popular news items as the answer of user searching
                 queries. Extensive experiments on a large-scale news
                 dataset collected from the web demonstrate the superior
                 performance of the proposed approaches over related
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Knees:2013:SMS,
  author =       "Peter Knees and Markus Schedl",
  title =        "A survey of music similarity and recommendation from
                 music context data",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "1",
  pages =        "2:1--2:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2542205.2542206",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this survey article, we give an overview of methods
                 for music similarity estimation and music
                 recommendation based on music context data. Unlike
                 approaches that rely on music content and have been
                 researched for almost two decades, music-context -based
                 (or contextual ) approaches to music retrieval are a
                 quite recent field of research within music information
                 retrieval (MIR). Contextual data refers to all
                 music-relevant information that is not included in the
                 audio signal itself. In this article, we focus on
                 contextual aspects of music primarily accessible
                 through web technology. We discuss different sources of
                 context-based data for individual music pieces and for
                 music artists. We summarize various approaches for
                 constructing similarity measures based on the
                 collaborative or cultural knowledge incorporated into
                 these data sources. In particular, we identify and
                 review three main types of context-based similarity
                 approaches: text-retrieval-based approaches (relying on
                 web-texts, tags, or lyrics), co-occurrence-based
                 approaches (relying on playlists, page counts,
                 microblogs, or peer-to-peer-networks), and approaches
                 based on user ratings or listening habits. This article
                 elaborates the characteristics of the presented
                 context-based measures and discusses their strengths as
                 well as their weaknesses.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhao:2013:DPO,
  author =       "Yi-Liang Zhao and Qiang Chen and Shuicheng Yan and
                 Tat-Seng Chua and Daqing Zhang",
  title =        "Detecting profilable and overlapping communities with
                 user-generated multimedia contents in {LBSNs}",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "1",
  pages =        "3:1--3:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2502415",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In location-based social networks (LBSNs), users
                 implicitly interact with each other by visiting places,
                 issuing comments and/or uploading photos. These
                 heterogeneous interactions convey the latent
                 information for identifying meaningful user groups,
                 namely social communities, which exhibit unique
                 location-oriented characteristics. In this work, we aim
                 to detect and profile social communities in LBSNs by
                 representing the heterogeneous interactions with a
                 multimodality nonuniform hypergraph. Here, the vertices
                 of the hypergraph are users, venues, textual comments
                 or photos and the hyperedges characterize the k
                 -partite heterogeneous interactions such as posting
                 certain comments or uploading certain photos while
                 visiting certain places. We then view each detected
                 social community as a dense subgraph within the
                 heterogeneous hypergraph, where the user community is
                 constructed by the vertices and edges in the dense
                 subgraph and the profile of the community is
                 characterized by the vertices related with venues,
                 comments and photos and their inter-relations. We
                 present an efficient algorithm to detect the overlapped
                 dense subgraphs, where the profile of each social
                 community is guaranteed to be available by constraining
                 the minimal number of vertices in each modality.
                 Extensive experiments on Foursquare data well validated
                 the effectiveness of the proposed framework in terms of
                 detecting meaningful social communities and uncovering
                 their underlying profiles in LBSNs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Bhatnagar:2013:SRI,
  author =       "Gaurav Bhatnagar and Q. M. Jonathan Wu and Pradeep K.
                 Atrey",
  title =        "Secure randomized image watermarking based on singular
                 value decomposition",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "1",
  pages =        "4:1--4:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2542205.2542207",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article, a novel logo watermarking scheme is
                 proposed based on wavelet frame transform, singular
                 value decomposition and automatic thresholding. The
                 proposed scheme essentially rectifies the ambiguity
                 problem in the SVD-based watermarking. The core idea is
                 to randomly upscale the size of host image using
                 reversible random extension transform followed by the
                 embedding of logo watermark in the wavelet frame
                 domain. After embedding, a verification phase is casted
                 with the help of a binary watermark and toral
                 automorphism. At the extraction end, the binary
                 watermark is first extracted followed by the
                 verification of watermarked image. The logo watermark
                 is extracted if and only if the watermarked image is
                 verified. The security, attack and comparative analysis
                 confirm high security, efficiency and robustness of the
                 proposed watermarking system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Mou:2013:CBC,
  author =       "Luntian Mou and Tiejun Huang and Yonghong Tian and
                 Menglin Jiang and Wen Gao",
  title =        "Content-based copy detection through multimodal
                 feature representation and temporal pyramid matching",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "1",
  pages =        "5:1--5:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2542205.2542208",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Content-based copy detection (CBCD) is drawing
                 increasing attention as an alternative technology to
                 watermarking for video identification and copyright
                 protection. In this article, we present a comprehensive
                 method to detect copies that are subjected to
                 complicated transformations. A multimodal feature
                 representation scheme is designed to exploit the
                 complementarity of audio features, global and local
                 visual features so that optimal overall robustness to a
                 wide range of complicated modifications can be
                 achieved. Meanwhile, a temporal pyramid matching
                 algorithm is proposed to assemble frame-level
                 similarity search results into sequence-level matching
                 results through similarity evaluation over multiple
                 temporal granularities. Additionally, inverted indexing
                 and locality sensitive hashing (LSH) are also adopted
                 to speed up similarity search. Experimental results
                 over benchmarking datasets of TRECVID 2010 and 2009
                 demonstrate that the proposed method outperforms other
                 methods for most transformations in terms of copy
                 detection accuracy. The evaluation results also suggest
                 that our method can achieve competitive copy
                 localization preciseness.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chen:2013:LSM,
  author =       "Xiangyu Chen and Yadong Mu and Hairong Liu and
                 Shuicheng Yan and Yong Rui and Tat-Seng Chua",
  title =        "Large-scale multilabel propagation based on efficient
                 sparse graph construction",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "1",
  pages =        "6:1--6:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2542205.2542209",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "With the popularity of photo-sharing websites, the
                 number of web images has exploded into unseen
                 magnitude. Annotating such large-scale data will cost
                 huge amount of human resources and is thus
                 unaffordable. Motivated by this challenging problem, we
                 propose a novel sparse graph based multilabel
                 propagation (SGMP) scheme for super large scale
                 datasets. Both the efficacy and accuracy of the image
                 annotation are further investigated under different
                 graph construction strategies, where Gaussian noise and
                 non-Gaussian sparse noise are simultaneously considered
                 in the formulations of these strategies. Our proposed
                 approach outperforms the state-of-the-art algorithms by
                 focusing on: (1) For large-scale graph construction, a
                 simple yet efficient LSH (Locality Sensitive
                 Hashing)-based sparse graph construction scheme is
                 proposed to speed up the construction. We perform the
                 multilabel propagation on this hashing-based graph
                 construction, which is derived with LSH approach
                 followed by sparse graph construction within the
                 individual hashing buckets; (2) To further improve the
                 accuracy, we propose a novel sparsity induced scalable
                 graph construction scheme, which is based on a general
                 sparse optimization framework. Sparsity essentially
                 implies a very strong prior: for large scale
                 optimization, the values of most variables shall be
                 zeros when the solution reaches the optimum. By
                 utilizing this prior, the solutions of large-scale
                 sparse optimization problems can be derived by solving
                 a series of much smaller scale subproblems; (3) For
                 multilabel propagation, different from the traditional
                 algorithms that propagate over individual label
                 independently, our proposed propagation first encodes
                 the label information of an image as a unit label
                 confidence vector and naturally imposes inter-label
                 constraints and manipulates labels interactively. Then,
                 the entire propagation problem is formulated on the
                 concept of Kullback--Leibler divergence defined on
                 probabilistic distributions, which guides the
                 propagation of the supervision information. Extensive
                 experiments on the benchmark dataset NUS-WIDE with 270k
                 images and its lite version NUS-WIDE-LITE with 56k
                 images well demonstrate the effectiveness and
                 scalability of the proposed multi-label propagation
                 scheme.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Houle:2013:API,
  author =       "Michael E. Houle and Vincent Oria and Shin'ichi Satoh
                 and Jichao Sun",
  title =        "Annotation propagation in image databases using
                 similarity graphs",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "1",
  pages =        "7:1--7:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2487736",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The practicality of large-scale image indexing and
                 querying methods depends crucially upon the
                 availability of semantic information. The manual
                 tagging of images with semantic information is in
                 general very labor intensive, and existing methods for
                 automated image annotation may not always yield
                 accurate results. The aim of this paper is to reduce to
                 a minimum the amount of human intervention required in
                 the semantic annotation of images, while preserving a
                 high degree of accuracy. Ideally, only one copy of each
                 object of interest would be labeled manually, and the
                 labels would then be propagated automatically to all
                 other occurrences of the objects in the database. To
                 this end, we propose an influence propagation strategy,
                 SW-KProp, that requires no human intervention beyond
                 the initial labeling of a subset of the images.
                 SW-KProp distributes semantic information within a
                 similarity graph defined on all images in the database:
                 each image iteratively transmits its current label
                 information to its neighbors, and then readjusts its
                 own label according to the combined influences of its
                 neighbors. SW-KProp influence propagation can be
                 efficiently performed by means of matrix computations,
                 provided that pairwise similarities of images are
                 available. We also propose a variant of SW-KProp which
                 enhances the quality of the similarity graph by
                 selecting a reduced feature set for each prelabeled
                 image and rebuilding its neighborhood. The performances
                 of the SW-KProp method and its variant were evaluated
                 against several competing methods on classification
                 tasks for three image datasets: a handwritten digit
                 dataset, a face dataset and a web image dataset. For
                 the digit images, SW-KProp and its variant performed
                 consistently better than the other methods tested. For
                 the face and web images, SW-KProp outperformed its
                 competitors for the case when the number of prelabeled
                 images was relatively small. The performance was seen
                 to improve significantly when the feature selection
                 strategy was applied.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Mallik:2013:MOR,
  author =       "Anupama Mallik and Hiranmay Ghosh and Santanu
                 Chaudhury and Gaurav Harit",
  title =        "{MOWL}: an ontology representation language for
                 {Web}-based multimedia applications",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "1",
  pages =        "8:1--8:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2542205.2542210",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Several multimedia applications need to reason with
                 concepts and their media properties in specific domain
                 contexts. Media properties of concepts exhibit some
                 unique characteristics that cannot be dealt with
                 conceptual modeling schemes followed in the existing
                 ontology representation and reasoning schemes. We have
                 proposed a new perceptual modeling technique for
                 reasoning with media properties observed in multimedia
                 instances and the latent concepts. Our knowledge
                 representation scheme uses a causal model of the world
                 where concepts manifest in media properties with
                 uncertainties. We introduce a probabilistic reasoning
                 scheme for belief propagation across domain concepts
                 through observation of media properties. In order to
                 support the perceptual modeling and reasoning paradigm,
                 we propose a new ontology language, Multimedia Web
                 Ontology Language (MOWL). Our primary contribution in
                 this article is to establish the need for the new
                 ontology language and to introduce the semantics of its
                 novel language constructs. We establish the generality
                 of our approach with two disparate knowledge-intensive
                 applications involving reasoning with media properties
                 of concepts.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Deng:2014:DLB,
  author =       "Yunhua Deng and Rynson W. H. Lau",
  title =        "Dynamic load balancing in distributed virtual
                 environments using heat diffusion",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "2",
  pages =        "16:1--16:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2499906",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:57 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Distributed virtual environments (DVEs) are attracting
                 a lot of attention in recent years, due to the
                 increasing popularity of online gaming and social
                 networks. As the number of concurrent users of a DVE
                 increases, a critical problem is on how the workload
                 among multiple servers can be balanced in order to
                 maintain real-time performance. Although a number of
                 load balancing methods have been proposed, they either
                 try to produce high quality load balancing results and
                 become too slow or emphasize on efficiency and the load
                 balancing results become less effective. In this
                 article, we propose a new approach to address this
                 problem based on heat diffusion. Our work has two main
                 contributions. First, we propose a local and a global
                 load balancing methods for DVEs based on heat
                 diffusion. Second, we investigate two performance
                 factors of the proposed methods, the convergence
                 threshold and the load balancing interval. We have
                 conducted a number of experiments to extensively
                 evaluate the performance of the proposed methods. Our
                 experimental results show that the proposed methods
                 outperform existing methods in that our methods are
                 effective in reducing server overloading while at the
                 same time being efficient.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{She:2014:CID,
  author =       "James She and Jon Crowcroft and Hao Fu and Flora Li",
  title =        "Convergence of interactive displays with smart mobile
                 devices for effective advertising: a survey",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "2",
  pages =        "17:1--17:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2557450",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:57 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The trend of replacing public static signages with
                 digital displays creates opportunities for interactive
                 display systems, which can be used in collaborative
                 workspaces, social gaming platforms and advertising.
                 Based on marketing communication concepts and existing
                 models for consumer behavior, three stages, namely
                 attraction, interaction and conation, are defined in
                 this article to analyze the effectiveness of
                 interactive display advertising. By reviewing various
                 methods and strategies employed by existing systems
                 with attraction, interaction and conation stages, this
                 article concludes that smart mobile devices should be
                 integrated as a component to increase the effectiveness
                 of interactive displays as advertising tools. Future
                 research challenges related to this topic are also
                 discussed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Gonina:2014:SMC,
  author =       "Ekaterina Gonina and Gerald Friedland and Eric
                 Battenberg and Penporn Koanantakool and Michael
                 Driscoll and Evangelos Georganas and Kurt Keutzer",
  title =        "Scalable multimedia content analysis on parallel
                 platforms using {Python}",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "2",
  pages =        "18:1--18:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2517151",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:57 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/pvm.bib;
                 https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this new era dominated by consumer-produced media
                 there is a high demand for web-scalable solutions to
                 multimedia content analysis. A compelling approach to
                 making applications scalable is to explicitly map their
                 computation onto parallel platforms. However,
                 developing efficient parallel implementations and fully
                 utilizing the available resources remains a challenge
                 due to the increased code complexity, limited
                 portability and required low-level knowledge of the
                 underlying hardware. In this article, we present
                 PyCASP, a Python-based framework that automatically
                 maps computation onto parallel platforms from Python
                 application code to a variety of parallel platforms.
                 PyCASP is designed using a systematic, pattern-oriented
                 approach to offer a single software development
                 environment for multimedia content analysis
                 applications. Using PyCASP, applications can be
                 prototyped in a couple hundred lines of Python code and
                 automatically scale to modern parallel processors.
                 Applications written with PyCASP are portable to a
                 variety of parallel platforms and efficiently scale
                 from a single desktop Graphics Processing Unit (GPU) to
                 an entire cluster with a small change to application
                 code. To illustrate our approach, we present three
                 multimedia content analysis applications that use our
                 framework: a state-of-the-art speaker diarization
                 application, a content-based music recommendation
                 system based on the Million Song Dataset, and a video
                 event detection system for consumer-produced videos. We
                 show that across this wide range of applications, our
                 approach achieves the goal of automatic portability and
                 scalability while at the same time allowing easy
                 prototyping in a high-level language and efficient
                 performance of low-level optimized code.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chandra:2014:HPM,
  author =       "Surendar Chandra and John Boreczky and Lawrence A.
                 Rowe",
  title =        "High performance many-to-many intranet screen sharing
                 with {DisplayCast}",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "2",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2534328",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:57 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "DisplayCast is a many to many Intranet screen sharing
                 system. Its screen capture mechanism creates a sequence
                 of pixmap images of the screen updates. Prior systems
                 that used a similar approach were designed to operate
                 over constrained wide-area networks and did not exploit
                 the Intranet network conditions to achieve high capture
                 rates. First we empirically analyzed the screen
                 contents for a variety of scenarios. We showed that
                 screen updates were sporadic with long periods of
                 inactivity. When active, screens were updated at far
                 higher rates than was supported by earlier systems. The
                 mismatch was pronounced for interactive scenarios. Even
                 during active screen updates, the number of updated
                 pixels were frequently small. We showed that crucial
                 information can be lost if individual updates were
                 merged. When the available system resources could not
                 support high capture rates, we showed ways in which
                 updates can be effectively collapsed. Next, we
                 investigate compression mechanisms for streaming these
                 updates. Even while using a hardware encoder, lossy
                 compressors such as H.264 were unable to sustain high
                 frame rates. Though Zlib lossless compression operated
                 within the latency and compression rate requirements,
                 the compression efficiency was poor. By analyzing the
                 screen pixels, we developed a practical transformation
                 that significantly improved compression rates.
                 DisplayCast incorporates these observations. It shares
                 the processor and network resources required for screen
                 capture, compression and transmission with host
                 applications whose output needs to be shared.
                 DisplayCast is agile and uses faster processing
                 capability to achieve even higher performance. Our
                 system components operate natively in Windows 7, Mac OS
                 X and iOS and is deployed in a production setting.
                 DisplayCast is released under a New BSD License.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lee:2014:NDH,
  author =       "Ya-Lin Lee and Wen-Hsiang Tsai",
  title =        "A new data hiding method via revision history records
                 on collaborative writing platforms",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "2",
  pages =        "20:1--20:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2534408",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:57 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "A new data hiding method via collaboratively-written
                 articles with forged revision history records on
                 collaborative writing platforms is proposed. The hidden
                 message is camouflaged as a stego-document consisting
                 of a stego-article and a revision history created
                 through a simulated process of collaborative writing.
                 The revisions are forged using a database constructed
                 by mining word sequences used in real cases from an
                 English Wikipedia XML dump. Four characteristics of
                 article revisions are identified and utilized to embed
                 secret messages, including the author of each revision,
                 the number of corrected word sequences, the content of
                 the corrected word sequences, and the word sequences
                 replacing the corrected ones. Related problems arising
                 in utilizing these characteristics for data hiding are
                 identified and solved skillfully, resulting in an
                 effective multiway method for hiding secret messages
                 into the revision history. To create more realistic
                 revisions, Huffman coding based on the word sequence
                 frequencies collected from Wikipedia is applied to
                 encode the word sequences. Good experimental results
                 show the feasibility of the proposed method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yuan:2014:MRB,
  author =       "Jin Yuan and Yi-Liang Zhao and Huanbo Luan and Meng
                 Wang and Tat-Seng Chua",
  title =        "Memory recall based video search: Finding videos you
                 have seen before based on your memory",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "2",
  pages =        "21:1--21:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2534409",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:57 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We often remember images and videos that we have seen
                 or recorded before but cannot quite recall the exact
                 venues or details of the contents. We typically have
                 vague memories of the contents, which can often be
                 expressed as a textual description and/or rough visual
                 descriptions of the scenes. Using these vague memories,
                 we then want to search for the corresponding videos of
                 interest. We call this ``Memory Recall based Video
                 Search'' (MRVS). To tackle this problem, we propose a
                 video search system that permits a user to input
                 his/her vague and incomplete query as a combination of
                 text query, a sequence of visual queries, and/or
                 concept queries. Here, a visual query is often in the
                 form of a visual sketch depicting the outline of scenes
                 within the desired video, while each corresponding
                 concept query depicts a list of visual concepts that
                 appears in that scene. As the query specified by users
                 is generally approximate or incomplete, we need to
                 develop techniques to handle this inexact and
                 incomplete specification by also leveraging on user
                 feedback to refine the specification. We utilize
                 several innovative approaches to enhance the automatic
                 search. First, we employ a visual query suggestion
                 model to automatically suggest potential visual
                 features to users as better queries. Second, we utilize
                 a color similarity matrix to help compensate for
                 inexact color specification in visual queries. Third,
                 we leverage on the ordering of visual queries and/or
                 concept queries to rerank the results by using a greedy
                 algorithm. Moreover, as the query is inexact and there
                 is likely to be only one or few possible answers, we
                 incorporate an interactive feedback loop to permit the
                 users to label related samples which are visually
                 similar or semantically close to the relevant sample.
                 Based on the labeled samples, we then propose
                 optimization algorithms to update visual queries and
                 concept weights to refine the search results. We
                 conduct experiments on two large-scale video datasets:
                 TRECVID 2010 and YouTube. The experimental results
                 demonstrate that our proposed system is effective for
                 MRVS tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2014:MIK,
  author =       "Xianglong Liu and Yadong Mu and Bo Lang and Shih-Fu
                 Chang",
  title =        "Mixed image-keyword query adaptive hashing over
                 multilabel images",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "2",
  pages =        "22:1--22:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2540990",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Mar 13 07:37:57 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article defines a new hashing task motivated by
                 real-world applications in content-based image
                 retrieval, that is, effective data indexing and
                 retrieval given mixed query (query image together with
                 user-provided keywords). Our work is distinguished from
                 state-of-the-art hashing research by two unique
                 features: (1) Unlike conventional image retrieval
                 systems, the input query is a combination of an
                 exemplar image and several descriptive keywords, and
                 (2) the input image data are often associated with
                 multiple labels. It is an assumption that is more
                 consistent with the realistic scenarios. The mixed
                 image-keyword query significantly extends traditional
                 image-based query and better explicates the user
                 intention. Meanwhile it complicates semantics-based
                 indexing on the multilabel data. Though several
                 existing hashing methods can be adapted to solve the
                 indexing task, unfortunately they all prove to suffer
                 from low effectiveness. To enhance the hashing
                 efficiency, we propose a novel scheme ``boosted shared
                 hashing''. Unlike prior works that learn the hashing
                 functions on either all image labels or a single label,
                 we observe that the hashing function can be more
                 effective if it is designed to index over an optimal
                 label subset. In other words, the association between
                 labels and hash bits are moderately sparse. The
                 sparsity of the bit-label association indicates greatly
                 reduced computation and storage complexities for
                 indexing a new sample, since only limited number of
                 hashing functions will become active for the specific
                 sample. We develop a Boosting style algorithm for
                 simultaneously optimizing both the optimal label
                 subsets and hashing functions in a unified formulation,
                 and further propose a query-adaptive retrieval
                 mechanism based on hash bit selection for mixed
                 queries, no matter whether or not the query words exist
                 in the training data. Moreover, we show that the
                 proposed method can be easily extended to the case
                 where the data similarity is gauged by nonlinear kernel
                 functions. Extensive experiments are conducted on
                 standard image benchmarks like CIFAR-10, NUS-WIDE and
                 a-TRECVID. The results validate both the sparsity of
                 the bit-label association and the convergence of the
                 proposed algorithm, and demonstrate that the proposed
                 hashing scheme achieves substantially superior
                 performances over state-of-the-art methods under the
                 same hash bit budget.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Anonymous:2014:TCO,
  author =       "Anonymous",
  title =        "Table of Contents: Online Supplement Volume 10, Number
                 1s",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "3",
  pages =        "22:1--22:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2602969",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Apr 15 12:20:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2014:DUB,
  author =       "Ning Liu and Huajie Cui and S.-H. Gary Chan and
                 Zhipeng Chen and Yirong Zhuang",
  title =        "Dissecting User Behaviors for a Simultaneous Live and
                 {VoD IPTV} System",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "3",
  pages =        "23:1--23:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2568194",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Apr 15 12:20:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "IPTV services deployed nowadays often consist of both
                 live TV and Video-on-Demand (VoD), offered by the same
                 service provider to the same pool of users over the
                 same managed network. Understanding user behaviors in
                 such a setting is hence an important step for system
                 modelling and optimization. Previous studies on user
                 behavior on video services were on either live TV or
                 VoD. For the first time, we conduct an in-depth
                 large-scale behavior study for IPTV users offering
                 simultaneously live TV and VoD choices at the same
                 time. Our data is from the largest IPTV service
                 provider in China, offering hundreds of live channels
                 and hundreds of thousands of VoD files, with traces
                 covering more than 1.9 million users over a period of 5
                 months. This large dataset provides us a unique
                 opportunity to cross-compare user viewing behaviors for
                 these services on the same platform, and sheds valuable
                 insights on how users interact with such a simultaneous
                 system. Our results lead to new understanding on IPTV
                 user behaviors which have strong implications on system
                 design. For example, we find that the average holding
                 time for VoD is significantly longer than live TV. live
                 TV users tend to surf more. However, if such channel
                 surfing is discounted, the holding times of both
                 services are not much different. While users in VoD
                 tend to view HD longer, channel popularity for live TV
                 is much less dependent on its video quality. In
                 contrast to some popular assumptions on user
                 interactivity, the transitions among live TV, VoD, and
                 offline modes are far from a Markov model.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Gaeta:2014:DDI,
  author =       "Rossano Gaeta and Marco Grangetto and Lorenzo Bovio",
  title =        "{DIP}: {Distributed Identification of Polluters} in
                 {P2P} Live Streaming",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "3",
  pages =        "24:1--24:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2568223",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Apr 15 12:20:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Peer-to-peer live streaming applications are
                 vulnerable to malicious actions of peers that
                 deliberately modify data to decrease or prevent the
                 fruition of the media (pollution attack). In this
                 article we propose DIP, a fully distributed, accurate,
                 and robust algorithm for the identification of
                 polluters. DIP relies on checks that are computed by
                 peers upon completing reception of all blocks composing
                 a data chunk. A check is a special message that
                 contains the set of peer identifiers that provided
                 blocks of the chunk as well as a bit to signal if the
                 chunk has been corrupted. Checks are periodically
                 transmitted by peers to their neighbors in the overlay
                 network; peers receiving checks use them to maintain a
                 factor graph. This graph is bipartite and an
                 incremental belief propagation algorithm is run on it
                 to compute the probability of a peer being a polluter.
                 Using a prototype deployed over PlanetLab we show by
                 extensive experimentation that DIP allows honest peers
                 to identify polluters with very high accuracy and
                 completeness, even when polluters collude to deceive
                 them. Furthermore, we show that DIP is efficient,
                 requiring low computational, communication, and storage
                 overhead at each peer.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hoque:2014:SEM,
  author =       "Mohammad Asharful Hoque and Matti Siekkinen and Jukka
                 K. Nurminen and Sasu Tarkoma and Mika Aalto",
  title =        "Saving Energy in Mobile Devices for On-Demand
                 Multimedia Streaming --- A Cross-Layer Approach",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "3",
  pages =        "25:1--25:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2556942",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Apr 15 12:20:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article proposes a novel energy-efficient
                 multimedia delivery system called EStreamer. First, we
                 study the relationship between buffer size at the
                 client, burst-shaped TCP-based multimedia traffic, and
                 energy consumption of wireless network interfaces in
                 smartphones. Based on the study, we design and
                 implement EStreamer for constant bit rate and
                 rate-adaptive streaming. EStreamer can improve battery
                 lifetime by 3x, 1.5x, and 2x while streaming over
                 Wi-Fi, 3G, and 4G, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wang:2014:HEK,
  author =       "Feng Wang and Wan-Lei Zhao and Chong-Wah Ngo and
                 Bernard Merialdo",
  title =        "A {Hamming} Embedding Kernel with Informative
                 Bag-of-Visual Words for Video Semantic Indexing",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "3",
  pages =        "26:1--26:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2535938",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Apr 15 12:20:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article, we propose a novel Hamming embedding
                 kernel with informative bag-of-visual words to address
                 two main problems existing in traditional BoW
                 approaches for video semantic indexing. First, Hamming
                 embedding is employed to alleviate the information loss
                 caused by SIFT quantization. The Hamming distances
                 between keypoints in the same cell are calculated and
                 integrated into the SVM kernel to better discriminate
                 different image samples. Second, to highlight the
                 concept-specific visual information, we propose to
                 weight the visual words according to their
                 informativeness for detecting specific concepts. We
                 show that our proposed kernels can significantly
                 improve the performance of concept detection.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yang:2014:MDF,
  author =       "Ying Yang and Ioannis Ivrissimtzis",
  title =        "Mesh Discriminative Features for {$3$D} Steganalysis",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "3",
  pages =        "27:1--27:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2535555",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Apr 15 12:20:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We propose a steganalytic algorithm for triangle
                 meshes, based on the supervised training of a
                 classifier by discriminative feature vectors. After a
                 normalization step, the triangle mesh is calibrated by
                 one step of Laplacian smoothing and then a feature
                 vector is computed, encoding geometric information
                 corresponding to vertices, edges and faces. For a given
                 steganographic or watermarking algorithm, we create a
                 training set containing unmarked meshes and meshes
                 marked by that algorithm, and train a classifier using
                 Quadratic Discriminant Analysis. The performance of the
                 proposed method was evaluated on six well-known
                 watermarking/steganographic schemes with satisfactory
                 accuracy rates.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hamam:2014:QEM,
  author =       "Abdelwahab Hamam and Abdulmotaleb {El Saddik} and
                 Jihad Alja'am",
  title =        "A Quality of Experience Model for Haptic Virtual
                 Environments",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "3",
  pages =        "28:1--28:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2540991",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Apr 15 12:20:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Haptic-based Virtual Reality (VR) applications have
                 many merits. What is still obscure, from the designer's
                 perspective of these applications, is the experience
                 the users will undergo when they use the VR system.
                 Quality of Experience (QoE) is an evaluation metric
                 from the user's perspective that unfortunately has
                 received limited attention from the research community.
                 Assessing the QoE of VR applications reflects the
                 amount of overall satisfaction and benefits gained from
                 the application in addition to laying the foundation
                 for ideal user-centric design in the future. In this
                 article, we propose a taxonomy for the evaluation of
                 QoE for multimedia applications and in particular VR
                 applications. We model this taxonomy using a Fuzzy
                 logic Inference System (FIS) to quantitatively measure
                 the QoE of haptic virtual environments. We build and
                 test our FIS by conducting a users' study analysis to
                 evaluate the QoE of a haptic game application. Our
                 results demonstrate that the proposed FIS model
                 reflects the user's estimation of the application's
                 quality significantly with low error and hence is
                 suited for QoE evaluation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Botta:2014:PCI,
  author =       "Marco Botta and Davide Cavagnino and Victor Pomponiu",
  title =        "Protecting the Content Integrity of Digital Imagery
                 with Fidelity Preservation: An Improved Version",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "3",
  pages =        "29:1--29:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2568224",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Apr 15 12:20:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Fragile watermarking has attracted a lot of attention
                 in the last decade. An interesting approach, presented
                 in 2011 by Lin et al., results in very high quality of
                 the watermarked images. However, after a thorough
                 examination of the paper, a few improvements are
                 proposed in our revised version of the algorithm in
                 order to overcome some shortcomings. In particular,
                 changes to the pseudocode and modifications to deal
                 with pixel saturation are suggested, along with a way
                 to improve the scheme security. Finally, a deeper
                 analysis of the security is presented.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Luo:2014:ICH,
  author =       "Da Luo and Weiqi Luo and Rui Yang and Jiwu Huang",
  title =        "Identifying Compression History of Wave Audio and Its
                 Applications",
  journal =      j-TOMCCAP,
  volume =       "10",
  number =       "3",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2575978",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Tue Apr 15 12:20:53 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Audio signal is sometimes stored and/or processed in
                 WAV (waveform) format without any knowledge of its
                 previous compression operations. To perform some
                 subsequent processing, such as digital audio forensics,
                 audio enhancement and blind audio quality assessment,
                 it is necessary to identify its compression history. In
                 this article, we will investigate how to identify a
                 decompressed wave audio that went through one of three
                 popular compression schemes, including MP3, WMA
                 (windows media audio) and AAC (advanced audio coding).
                 By analyzing the corresponding frequency coefficients,
                 including modified discrete cosine transform (MDCT) and
                 Mel-frequency cepstral coefficients (MFCCs), of those
                 original audio clips and their decompressed versions
                 with different compression schemes and bit rates, we
                 propose several statistics to identify the compression
                 scheme as well as the corresponding bit rate previously
                 used for a given WAV signal. The experimental results
                 evaluated on 8,800 audio clips with various contents
                 have shown the effectiveness of the proposed method. In
                 addition, some potential applications of the proposed
                 method are discussed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

%%% ====================================================================
%%% From the ACM Portal Web site: ``On 23rd May 2014, ACM TOMCCAP
%%% changed its acronym to ACM TOMM. This acronym change was the result
%%% of extensive discussions between the journal Editorial Board and
%%% SIGMM constituents dating back to 2011. This name change emphasizes
%%% the continued strong collaboration with the ACM Multimedia
%%% conference (ACMMM).''
%%% ====================================================================
@Article{Zhang:2014:CDM,
  author =       "Tianzhu Zhang and Changsheng Xu",
  title =        "Cross-Domain Multi-Event Tracking via {CO-PMHT}",
  journal =      j-TOMM,
  volume =       "10",
  number =       "4",
  pages =        "31:1--31:??",
  month =        jun,
  year =         "2014",
  DOI =          "https://doi.org/10.1145/2602633",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 8 11:32:58 MDT 2014",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "With the massive growth of events on the Internet,
                 efficient organization and monitoring of events becomes
                 a practical challenge. To deal with this problem, we
                 propose a novel CO-PMHT (CO-Probabilistic
                 Multi-Hypothesis Tracking) algorithm for cross-domain
                 multi-event tracking to obtain their informative
                 summary details and evolutionary trends over time. We
                 collect a large-scale dataset by searching keywords on
                 two domains (Gooogle News and Flickr) and downloading
                 both images and textual content for an event. Given the
                 input data, our algorithm can track multiple events in
                 the two domains collaboratively and boost the tracking
                 performance. Specifically, the bridge between two
                 domains is a semantic posterior probability, that
                 avoids the domain gap. After tracking, we can visualize
                 the whole evolutionary process of the event over time
                 and mine the semantic topics of each event for deep
                 understanding and event prediction. The extensive
                 experimental evaluations on the collected dataset well
                 demonstrate the effectiveness of the proposed algorithm
                 for cross-domain multi-event tracking.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Huang:2014:PVR,
  author =       "Qinghua Huang and Bisheng Chen and Jingdong Wang and
                 Tao Mei",
  title =        "Personalized Video Recommendation through Graph
                 Propagation",
  journal =      j-TOMM,
  volume =       "10",
  number =       "4",
  pages =        "32:1--32:??",
  month =        jun,
  year =         "2014",
  DOI =          "https://doi.org/10.1145/2598779",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 8 11:32:58 MDT 2014",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The rapid growth of the number of videos on the
                 Internet provides enormous potential for users to find
                 content of interest. However, the vast quantity of
                 videos also turns the finding process into a difficult
                 task. In this article, we address the problem of
                 providing personalized video recommendation for users.
                 Rather than only exploring the user-video bipartite
                 graph that is formulated using click information, we
                 first combine the clicks and queries information to
                 build a tripartite graph. In the tripartite graph, the
                 query nodes act as bridges to connect user nodes and
                 video nodes. Then, to further enrich the connections
                 between users and videos, three subgraphs between the
                 same kinds of nodes are added to the tripartite graph
                 by exploring content-based information (video tags and
                 textual queries). We propose an iterative propagation
                 algorithm over the enhanced graph to compute the
                 preference information of each user. Experiments
                 conducted on a dataset with 1,369 users, 8,765 queries,
                 and 17,712 videos collected from a commercial video
                 search engine demonstrate the effectiveness of the
                 proposed method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Li:2014:UVS,
  author =       "Haitao Li and Xu Cheng and Jiangchuan Liu",
  title =        "Understanding Video Sharing Propagation in Social
                 Networks: Measurement and Analysis",
  journal =      j-TOMM,
  volume =       "10",
  number =       "4",
  pages =        "33:1--33:??",
  month =        jun,
  year =         "2014",
  DOI =          "https://doi.org/10.1145/2594440",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 8 11:32:58 MDT 2014",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Modern online social networking has drastically
                 changed the information distribution landscape.
                 Recently, video has become one of the most important
                 types of objects spreading among social networking
                 service users. The sheer and ever-increasing data
                 volume, the broader coverage, and the longer access
                 durations of video objects, however, present
                 significantly more challenges than other types of
                 objects. This article takes an initial step toward
                 understanding the unique characteristics of video
                 sharing propagation in social networks. Based on
                 realworld data traces from a large-scale online social
                 network, we examine the user behavior from diverse
                 aspects and identify different types of users involved
                 in video propagation. We closely investigate the
                 temporal distribution during propagation as well as the
                 typical propagation structures, revealing more details
                 beyond stationary coverage. We further extend the
                 conventional epidemic models to accommodate diverse
                 types of users and their probabilistic viewing and
                 sharing behaviors. The model, effectively capturing the
                 essentials of the propagation process, serves as a
                 valuable basis for such applications as workload
                 synthesis, traffic prediction, and resource provision
                 of video servers.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wang:2014:BCM,
  author =       "Zhiyu Wang and Peng Cui and Lexing Xie and Wenwu Zhu
                 and Yong Rui and Shiqiang Yang",
  title =        "Bilateral Correspondence Model for Words-and-Pictures
                 Association in Multimedia-Rich Microblogs",
  journal =      j-TOMM,
  volume =       "10",
  number =       "4",
  pages =        "34:1--34:??",
  month =        jun,
  year =         "2014",
  DOI =          "https://doi.org/10.1145/2611388",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 8 11:32:58 MDT 2014",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Nowadays, the amount of multimedia contents in
                 microblogs is growing significantly. More than 20\% of
                 microblogs link to a picture or video in certain large
                 systems. The rich semantics in microblogs provides an
                 opportunity to endow images with higher-level semantics
                 beyond object labels. However, this raises new
                 challenges for understanding the association between
                 multimodal multimedia contents in multimedia-rich
                 microblogs. Disobeying the fundamental assumptions of
                 traditional annotation, tagging, and retrieval systems,
                 pictures and words in multimedia-rich microblogs are
                 loosely associated and a correspondence between
                 pictures and words cannot be established. To address
                 the aforementioned challenges, we present the first
                 study analyzing and modeling the associations between
                 multimodal contents in microblog streams, aiming to
                 discover multimodal topics from microblogs by
                 establishing correspondences between pictures and words
                 in microblogs. We first use a data-driven approach to
                 analyze the new characteristics of the words, pictures,
                 and their association types in microblogs. We then
                 propose a novel generative model called the Bilateral
                 Correspondence Latent Dirichlet Allocation (BC-LDA)
                 model. Our BC-LDA model can assign flexible
                 associations between pictures and words and is able to
                 not only allow picture-word co-occurrence with
                 bilateral directions, but also single modal
                 association. This flexible association can best fit the
                 data distribution, so that the model can discover
                 various types of joint topics and generate pictures and
                 words with the topics accordingly. We evaluate this
                 model extensively on a large-scale real multimedia-rich
                 microblogs dataset. We demonstrate the advantages of
                 the proposed model in several application scenarios,
                 including image tagging, text illustration, and topic
                 discovery. The experimental results demonstrate that
                 our proposed model can significantly and consistently
                 outperform traditional approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lei:2014:FND,
  author =       "Yanqiang Lei and Guoping Qiu and Ligang Zheng and Jiwu
                 Huang",
  title =        "Fast Near-Duplicate Image Detection Using Uniform
                 Randomized Trees",
  journal =      j-TOMM,
  volume =       "10",
  number =       "4",
  pages =        "35:1--35:??",
  month =        jun,
  year =         "2014",
  DOI =          "https://doi.org/10.1145/2602186",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 8 11:32:58 MDT 2014",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Indexing structure plays an important role in the
                 application of fast near-duplicate image detection,
                 since it can narrow down the search space. In this
                 article, we develop a cluster of uniform randomized
                 trees (URTs) as an efficient indexing structure to
                 perform fast near-duplicate image detection. The main
                 contribution in this article is that we introduce
                 ``uniformity'' and ``randomness'' into the indexing
                 construction. The uniformity requires classifying the
                 object images into the same scale subsets. Such a
                 decision makes good use of the two facts in
                 near-duplicate image detection, namely: (1) the number
                 of categories is huge; (2) a single category usually
                 contains only a small number of images. Therefore, the
                 uniform distribution is very beneficial to narrow down
                 the search space and does not significantly degrade the
                 detection accuracy. The randomness is embedded into the
                 generation of feature subspace and projection
                 direction, improving the flexibility of indexing
                 construction. The experimental results show that the
                 proposed method is more efficient than the popular
                 locality-sensitive hashing and more stable and flexible
                 than the traditional KD-tree.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yeh:2014:PPR,
  author =       "Che-Hua Yeh and Brian A. Barsky and Ming Ouhyoung",
  title =        "Personalized Photograph Ranking and Selection System
                 Considering Positive and Negative User Feedback",
  journal =      j-TOMM,
  volume =       "10",
  number =       "4",
  pages =        "36:1--36:??",
  month =        jun,
  year =         "2014",
  DOI =          "https://doi.org/10.1145/2584105",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 8 11:32:58 MDT 2014",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article, we propose a novel personalized
                 ranking system for amateur photographs. The proposed
                 framework treats the photograph assessment as a ranking
                 problem and we introduce the idea of personalized
                 ranking, which ranks photographs considering both their
                 aesthetic qualities and personal preferences.
                 Photographs are described using three types of
                 features: photo composition, color and intensity
                 distribution, and personalized features. An aesthetic
                 prediction model is learned from labeled photographs by
                 using the proposed image features and RBF-ListNet
                 learning algorithm. The experimental results show that
                 the proposed framework outperforms in the ranking
                 performance: a Kendall's tau value of 0.432 is
                 significantly higher than those obtained by the
                 features proposed in one of the state-of-the-art
                 approaches (0.365) and by learning based on support
                 vector regression (0.384). To realize personalization
                 in ranking, three approaches are proposed: the
                 feature-based approach allows users to select
                 photographs with specific rules, the example-based
                 approach takes the positive feedback from users to
                 rerank the photograph, and the list-based approach
                 takes both positive and negative feedback from users
                 into consideration. User studies indicate that all
                 three approaches are effective in both aesthetic and
                 personalized ranking.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Tan:2014:PVS,
  author =       "Song Tan and Yu-Gang Jiang and Chong-Wah Ngo",
  title =        "Placing Videos on a Semantic Hierarchy for Search
                 Result Navigation",
  journal =      j-TOMM,
  volume =       "10",
  number =       "4",
  pages =        "37:1--37:??",
  month =        jun,
  year =         "2014",
  DOI =          "https://doi.org/10.1145/2578394",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 8 11:32:58 MDT 2014",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Organizing video search results in a list view is
                 widely adopted by current commercial search engines,
                 which cannot support efficient browsing for complex
                 search topics that have multiple semantic facets. In
                 this article, we propose to organize video search
                 results in a highly structured way. Specifically,
                 videos are placed on a semantic hierarchy that
                 accurately organizes various facets of a given search
                 topic. To pick the most suitable videos for each node
                 of the hierarchy, we define and utilize three important
                 criteria: relevance, uniqueness, and diversity.
                 Extensive evaluations on a large YouTube video dataset
                 demonstrate the effectiveness of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Steinmetz:2014:EN,
  author =       "Ralf Steinmetz",
  title =        "Editorial Note",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1",
  pages =        "1:1--1:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2634234",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Mon Sep 1 12:38:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2014:SBA,
  author =       "Yong-Jin Liu and Cui-Xia Ma and Qiufang Fu and Xiaolan
                 Fu and Sheng-Feng Qin and Lexing Xie",
  title =        "A Sketch-Based Approach for Interactive Organization
                 of Video Clips",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1",
  pages =        "2:1--2:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2645643",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Mon Sep 1 12:38:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "With the rapid growth of video resources, techniques
                 for efficient organization of video clips are becoming
                 appealing in the multimedia domain. In this article, a
                 sketch-based approach is proposed to intuitively
                 organize video clips by: (1) enhancing their narrations
                 using sketch annotations and (2) structurizing the
                 organization process by gesture-based free-form
                 sketching on touch devices. There are two main
                 contributions of this work. The first is a sketch
                 graph, a novel representation for the narrative
                 structure of video clips to facilitate content
                 organization. The second is a method to perform
                 context-aware sketch recommendation scalable to large
                 video collections, enabling common users to easily
                 organize sketch annotations. A prototype system
                 integrating the proposed approach was evaluated on the
                 basis of five different aspects concerning its
                 performance and usability. Two sketch searching
                 experiments showed that the proposed context-aware
                 sketch recommendation outperforms, in terms of accuracy
                 and scalability, two state-of-the-art sketch searching
                 methods. Moreover, a user study showed that the sketch
                 graph is consistently preferred over traditional
                 representations such as keywords and keyframes. The
                 second user study showed that the proposed approach is
                 applicable in those scenarios where the video annotator
                 and organizer were the same person. The third user
                 study showed that, for video content organization,
                 using sketch graph users took on average 1/3 less time
                 than using a mass-market tool Movie Maker and took on
                 average 1/4 less time than using a state-of-the-art
                 sketch alternative. These results demonstrated that the
                 proposed sketch graph approach is a promising video
                 organization tool.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Huang:2014:CSA,
  author =       "Junshi Huang and Si Liu and Junliang Xing and Tao Mei
                 and Shuicheng Yan",
  title =        "Circle \& Search: Attribute-Aware Shoe Retrieval",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1",
  pages =        "3:1--3:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2632165",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Mon Sep 1 12:38:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Taking the shoe as a concrete example, we present an
                 innovative product retrieval system that leverages
                 object detection and retrieval techniques to support a
                 brand-new online shopping experience in this article.
                 The system, called Circle \& Search, enables users to
                 naturally indicate any preferred product by simply
                 circling the product in images as the visual query, and
                 then returns visually and semantically similar products
                 to the users. The system is characterized by
                 introducing attributes in both the detection and
                 retrieval of the shoe. Specifically, we first develop
                 an attribute-aware part-based shoe detection model. By
                 maintaining the consistency between shoe parts and
                 attributes, this shoe detector has the ability to model
                 high-order relations between parts and thus the
                 detection performance can be enhanced. Meanwhile, the
                 attributes of this detected shoe can also be predicted
                 as the semantic relations between parts. Based on the
                 result of shoe detection, the system ranks all the
                 shoes in the repository using an attribute refinement
                 retrieval model that takes advantage of query-specific
                 information and attribute correlation to provide an
                 accurate and robust shoe retrieval. To evaluate this
                 retrieval system, we build a large dataset with 17,151
                 shoe images, in which each shoe is annotated with 10
                 shoe attributes e.g., heel height, heel shape, sole
                 shape, etc.. According to the experimental result and
                 the user study, our Circle \& Search system achieves
                 promising shoe retrieval performance and thus
                 significantly improves the users' online shopping
                 experience.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Guan:2014:TAV,
  author =       "Genliang Guan and Zhiyong Wang and Shaohui Mei and Max
                 Ott and Mingyi He and David Dagan Feng",
  title =        "A Top-Down Approach for Video Summarization",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1",
  pages =        "4:1--4:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2632267",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Mon Sep 1 12:38:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "While most existing video summarization approaches aim
                 to identify important frames of a video from either a
                 global or local perspective, we propose a top-down
                 approach consisting of scene identification and scene
                 summarization. For scene identification, we represent
                 each frame with global features and utilize a scalable
                 clustering method. We then formulate scene
                 summarization as choosing those frames that best cover
                 a set of local descriptors with minimal redundancy. In
                 addition, we develop a visual word-based approach to
                 make our approach more computationally scalable.
                 Experimental results on two benchmark datasets
                 demonstrate that our proposed approach clearly
                 outperforms the state-of-the-art.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Pazzi:2014:PPP,
  author =       "Richard W. Pazzi and Azzedine Boukerche",
  title =        "{PROPANE}: a Progressive Panorama Streaming Protocol
                 to Support Interactive {$3$D} Virtual Environment
                 Exploration on Graphics-Constrained Devices",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1",
  pages =        "5:1--5:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2602222",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Mon Sep 1 12:38:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Image-Based Rendering (IBR) has become widely known by
                 its relatively low requirements for generating new
                 scenes based on a sequence of reference images. This
                 characteristic of IBR shows a remarkable potential
                 impact in rendering complex 3D virtual environments on
                 graphics-constrained devices, such as head-mounted
                 displays, set-top boxes, media streaming devices, and
                 so on. If well exploited, IBR coupled with remote
                 rendering would enable the exploration of complex
                 virtual environments on these devices. However, remote
                 rendering requires the transmission of a large volume
                 of images. In addition, existing solutions consider
                 limited and/or deterministic navigation schemes as a
                 means of decreasing the volume of streamed data. This
                 article proposes the PROgressive PANorama StrEaming
                 protocol (PROPANE) to offer users a smoother virtual
                 navigation experience by prestreaming the imagery data
                 required to generate new views as the user wanders
                 within a 3D environment. PROPANE is based on a very
                 simple yet effective trigonometry model and uses a
                 strafe (lateral movement) technique to minimize the
                 delay between image updates at the client end. This
                 article introduces the concept of key partial
                 panoramas, namely panorama segments that cover
                 movements in any direction by simply strafing from an
                 appropriate key partial panorama and streaming the
                 amount of lost pixels. Therefore, PROPANE can provide a
                 constrained device with sufficient imagery data to
                 cover a future user's viewpoints, thereby minimizing
                 the impact of transmission delay and jitter. PROPANE
                 has been implemented and compared to two baseline
                 remote rendering schemes. The evaluation results show
                 that the proposed technique outperforms the selected
                 and closely related existing schemes by minimizing the
                 response time while not limiting the user to predefined
                 paths as opposed to previous protocols.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wang:2014:FEM,
  author =       "Xiangyu Wang and Yong Rui and Mohan Kankanhalli",
  title =        "{Up-Fusion}: an Evolving Multimedia Fusion Method",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1",
  pages =        "6:1--6:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2611777",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Mon Sep 1 12:38:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The amount of multimedia data on the Internet has
                 increased exponentially in the past few decades and
                 this trend is likely to continue. Multimedia content
                 inherently has multiple information sources, therefore
                 effective fusion methods are critical for data analysis
                 and understanding. So far, most of the existing fusion
                 methods are static with respect to time, making it
                 difficult for them to handle the evolving multimedia
                 content. To address this issue, in recent years,
                 several evolving fusion methods were proposed, however,
                 their requirements are difficult to meet, making them
                 useful only in limited applications. In this article,
                 we propose a novel evolving fusion method based on the
                 online portfolio selection theory. The proposed method
                 takes into account the correlation among different
                 information sources and evolves the fusion model when
                 new multimedia data is added. It performs effectively
                 on both crisp and soft decisions without requiring
                 additional context information. Extensive experiments
                 on concept detection and human detection tasks over the
                 TRECVID dataset and surveillance data have been
                 conducted and significantly better performance has been
                 obtained.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wang:2014:EIP,
  author =       "Xinxi Wang and Yi Wang and David Hsu and Ye Wang",
  title =        "Exploration in Interactive Personalized Music
                 Recommendation: a Reinforcement Learning Approach",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1",
  pages =        "7:1--7:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2623372",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Mon Sep 1 12:38:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Current music recommender systems typically act in a
                 greedy manner by recommending songs with the highest
                 user ratings. Greedy recommendation, however, is
                 suboptimal over the long term: it does not actively
                 gather information on user preferences and fails to
                 recommend novel songs that are potentially interesting.
                 A successful recommender system must balance the needs
                 to explore user preferences and to exploit this
                 information for recommendation. This article presents a
                 new approach to music recommendation by formulating
                 this exploration-exploitation trade-off as a
                 reinforcement learning task. To learn user preferences,
                 it uses a Bayesian model that accounts for both audio
                 content and the novelty of recommendations. A
                 piecewise-linear approximation to the model and a
                 variational inference algorithm help to speed up
                 Bayesian inference. One additional benefit of our
                 approach is a single unified model for both music
                 recommendation and playlist generation. We demonstrate
                 the strong potential of the proposed approach with
                 simulation results and a user study.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Katti:2014:OEE,
  author =       "Harish Katti and Anoop Kolar Rajagopal and Mohan
                 Kankanhalli and Ramakrishnan Kalpathi",
  title =        "Online Estimation of Evolving Human Visual Interest",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1",
  pages =        "8:1--8:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2632284",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Mon Sep 1 12:38:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Regions in video streams attracting human interest
                 contribute significantly to human understanding of the
                 video. Being able to predict salient and informative
                 Regions of Interest (ROIs) through a sequence of eye
                 movements is a challenging problem. Applications such
                 as content-aware retargeting of videos to different
                 aspect ratios while preserving informative regions and
                 smart insertion of dialog (closed-caption text)$^1$
                 into the video stream can significantly be improved
                 using the predicted ROIs. We propose an interactive
                 human-in-the-loop framework to model eye movements and
                 predict visual saliency into yet-unseen frames. Eye
                 tracking and video content are used to model visual
                 attention in a manner that accounts for important
                 eye-gaze characteristics such as temporal
                 discontinuities due to sudden eye movements, noise, and
                 behavioral artifacts. A novel statistical- and
                 algorithm-based method gaze buffering is proposed for
                 eye-gaze analysis and its fusion with content-based
                 features. Our robust saliency prediction is
                 instantiated for two challenging and exciting
                 applications. The first application alters video aspect
                 ratios on-the-fly using content-aware video
                 retargeting, thus making them suitable for a variety of
                 display sizes. The second application dynamically
                 localizes active speakers and places dialog captions
                 on-the-fly in the video stream. Our method ensures that
                 dialogs are faithful to active speaker locations and do
                 not interfere with salient content in the video stream.
                 Our framework naturally accommodates personalisation of
                 the application to suit biases and preferences of
                 individual users.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ghinea:2014:ISI,
  author =       "Gheorghita Ghinea and Christian Timmerer and Weisi Lin
                 and Stephen Gulliver",
  title =        "Introduction to Special Issue on Multiple Sensorial
                 {(MulSeMedia)} Multimodal Media: Advances and
                 Applications",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "9:1--9:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2661333",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lv:2014:MHF,
  author =       "Zhihan Lv and Alaa Halawani and Shengzhong Feng and
                 Haibo Li and Shafiq Ur R{\'e}hman",
  title =        "Multimodal Hand and Foot Gesture Interaction for
                 Handheld Devices",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "10:1--10:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2645860",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We present a hand-and-foot-based multimodal
                 interaction approach for handheld devices. Our method
                 combines input modalities (i.e., hand and foot) and
                 provides a coordinated output to both modalities along
                 with audio and video. Human foot gesture is detected
                 and tracked using contour-based template detection
                 (CTD) and Tracking-Learning-Detection (TLD) algorithm.
                 3D foot pose is estimated from passive homography
                 matrix of the camera. 3D stereoscopic and vibrotactile
                 are used to enhance the immersive feeling. We developed
                 a multimodal football game based on the multimodal
                 approach as a proof-of-concept. We confirm our systems
                 user satisfaction through a user study.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Prasad:2014:DVC,
  author =       "Manoj Prasad and Murat Russell and Tracy A. Hammond",
  title =        "Designing Vibrotactile Codes to Communicate Verb
                 Phrases",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "11:1--11:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2637289",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Soldiers, to guard themselves from enemy assault, have
                 to maintain visual and auditory awareness of their
                 environment. Their visual and auditory senses are thus
                 saturated. This makes these channels less usable for
                 communication. The tactile medium of communication with
                 users is appropriate for displaying information in such
                 situations. Research in interpersonal communication
                 among soldiers shows that the most common form of
                 communication between soldiers involves the use of verb
                 phrases. In this article, we have developed a
                 three-by-three tactile display and proposed a method
                 for mapping the components of a verb phrase to two
                 dimensions of tactile codes-shape and waveform.
                 Perception of tactile codes by users depends on the
                 ability of users to distinguish shape and waveform of
                 the code. We have proposed a measure to rate the
                 distinguish-ability of any two shapes and created a
                 graph-based user-centric model using this measure to
                 select distinguishable shapes from a set of all
                 presentable shapes. We conducted two user studies to
                 evaluate the ability of users to perceive tactile
                 information. The results from our first study showed
                 users' ability to perceive tactile shapes, tactile
                 waveforms, and form verb phrases from tactile codes.
                 The recognition accuracy and time taken to distinguish
                 were better when the shapes were selected from the
                 graph model than when shapes were chosen based on
                 intuition. The second user study was conducted to test
                 the performance of users while performing a primary
                 visual task simultaneously with a secondary audio or
                 haptic task. Users were more familiar with perceiving
                 information from an auditory medium than from a haptic
                 medium, which was reflected in their performance. Thus
                 the performance of users in the primary visual task was
                 better while using an audio medium of communication
                 than while using a haptic medium of communication.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Murray:2014:MSE,
  author =       "Niall Murray and Brian Lee and Yuansong Qiao and
                 Gabriel-Miro Muntean",
  title =        "Multiple-Scent Enhanced Multimedia Synchronization",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "12:1--12:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2637293",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This study looked at users' perception of interstream
                 synchronization between audiovisual media and two
                 olfactory streams. The ability to detect skews and the
                 perception and impact of skews on user Quality of
                 Experience (QoE) is analyzed. The olfactory streams are
                 presented with the same skews (i.e., delay) and with
                 variable skews (i.e., jitter and mix of scents). This
                 article reports the limits beyond which
                 desynchronization reduces user-perceived quality
                 levels. Also, a minimum gap between the presentations
                 of consecutive scents is identified, necessary to
                 ensuring enhanced user-perceived quality. There is no
                 evidence (not considering scent type) that overlapping
                 or mixing of scents increases user QoE levels for
                 olfaction-enhanced multimedia.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Kroupi:2014:ECP,
  author =       "Eleni Kroupi and Ashkan Yazdani and Jean-Marc Vesin
                 and Touradj Ebrahimi",
  title =        "{EEG} Correlates of Pleasant and Unpleasant Odor
                 Perception",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2637287",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Olfaction-enhanced multimedia experience is becoming
                 vital for strengthening the sensation of reality and
                 the quality of user experience. One approach to
                 investigate olfactory perception is to analyze the
                 alterations in brain activity during stimulation with
                 different odors. In this article, the changes in the
                 electroencephalogram (EEG) when perceiving
                 hedonically-different odors are studied. Results of
                 within and across-subject analysis are presented. We
                 show that EEG-based odor classification using brain
                 activity is possible and can be used to automatically
                 recognize odor pleasantness when a subject-specific
                 classifier is trained. However, it is a challenging
                 problem to design a generic classifier.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Rainer:2014:GUM,
  author =       "Benjamin Rainer and Christian Timmerer",
  title =        "A Generic Utility Model Representing the Quality of
                 Sensory Experience",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "14:1--14:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2648429",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Current QoE research is mainly focusing on single
                 modalities (audio, visual) or combinations thereof. In
                 our research, we propose annotating traditional
                 multimedia content with additional sensory effects,
                 such as ambient light, vibration, wind, and olfaction,
                 which could potentially stimulate all human senses.
                 Investigating the influence of individual sensory
                 effects and combinations thereof is important in order
                 to understand how these individual sensory effects
                 influence the Quality of Experience (QoE) as a whole.
                 In this article, we describe the results of such a
                 subjective quality assessment of audio-visual sequences
                 which are annotated with additional sensory effects
                 such as ambient light, wind, and vibration using the
                 MPEG-V standard. The results of this assessment allow
                 us to derive a utility model representing the Quality
                 of Sensory Experience (QuaSE) complementary to existing
                 QoE models described in terms of Quality of Service
                 (QoS) parameters. For validating our proposed utility
                 model, we provide an example instantiation and validate
                 it against results of subjective quality assessments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yuan:2014:UQE,
  author =       "Zhenhui Yuan and Shengyang Chen and Gheorghita Ghinea
                 and Gabriel-Miro Muntean",
  title =        "User Quality of Experience of Mulsemedia
                 Applications",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "15:1--15:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2661329",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "User Quality of Experience (QoE) is of fundamental
                 importance in multimedia applications and has been
                 extensively studied for decades. However, user QoE in
                 the context of the emerging multiple-sensorial media
                 (mulsemedia) services, which involve different media
                 components than the traditional multimedia
                 applications, have not been comprehensively studied.
                 This article presents the results of subjective tests
                 which have investigated user perception of mulsemedia
                 content. In particular, the impact of intensity of
                 certain mulsemedia components including haptic and
                 airflow on user-perceived experience are studied.
                 Results demonstrate that by making use of mulsemedia
                 the overall user enjoyment levels increased by up to
                 77\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Luque:2014:IMS,
  author =       "Francisco Pedro Luque and Iris Galloso and Claudio
                 Feijoo and Carlos Alberto Mart{\'\i}n and Guillermo
                 Cisneros",
  title =        "Integration of Multisensorial Stimuli and Multimodal
                 Interaction in a Hybrid {$3$DTV} System",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "16:1--16:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2617992",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article proposes the integration of
                 multisensorial stimuli and multimodal interaction
                 components into a sports multimedia asset under two
                 dimensions: immersion and interaction. The first
                 dimension comprises a binaural audio system and a set
                 of sensory effects synchronized with the audiovisual
                 content, whereas the second explores interaction
                 through the insertion of interactive 3D objects into
                 the main screen and on-demand presentation of
                 additional information in a second touchscreen. We
                 present an end-to-end solution integrating these
                 components into a hybrid (internet-broadcast)
                 television system using current 3DTV standards. Results
                 from an experimental study analyzing the perceived
                 quality of these stimuli and their influence on the
                 Quality of Experience are presented.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ghinea:2014:MSA,
  author =       "Gheorghita Ghinea and Christian Timmerer and Weisi Lin
                 and Stephen R. Gulliver",
  title =        "Mulsemedia: State of the Art, Perspectives, and
                 Challenges",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "17:1--17:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2617994",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Mulsemedia-multiple sensorial media-captures a wide
                 variety of research efforts and applications. This
                 article presents a historic perspective on mulsemedia
                 work and reviews current developments in the area.
                 These take place across the traditional multimedia
                 spectrum-from virtual reality applications to computer
                 games-as well as efforts in the arts, gastronomy, and
                 therapy, to mention a few. We also describe
                 standardization efforts, via the MPEG-V standard, and
                 identify future developments and exciting challenges
                 the community needs to overcome.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zha:2014:ISI,
  author =       "Zheng-Jun Zha and Lei Zhang and Max M{\"u}hlh{\"a}user
                 and Alan F. Smeaton",
  title =        "Introduction to the Special Issue Best Papers of {ACM
                 Multimedia 2013}",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "18:1--18:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2661331",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Fang:2014:DGI,
  author =       "Quan Fang and Jitao Sang and Changsheng Xu",
  title =        "Discovering Geo-Informative Attributes for Location
                 Recognition and Exploration",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "19:1--19:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2648581",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article considers the problem of automatically
                 discovering geo-informative attributes for location
                 recognition and exploration. The attributes are
                 expected to be both discriminative and representative,
                 which correspond to certain distinctive visual patterns
                 and associate with semantic interpretations. For our
                 solution, we analyze the attribute at the region level.
                 Each segmented region in the training set is assigned a
                 binary latent variable indicating its discriminative
                 capability. A latent learning framework is proposed for
                 discriminative region detection and geo-informative
                 attribute discovery. Moreover, we use user-generated
                 content to obtain the semantic interpretation for the
                 discovered visual attributes. Discriminative and
                 search-based attribute annotation methods are developed
                 for geo-informative attribute interpretation. The
                 proposed approach is evaluated on one challenging
                 dataset including GoogleStreetView and Flickr photos.
                 Experimental results show that (1) geo-informative
                 attributes are discriminative and useful for location
                 recognition; (2) the discovered semantic interpretation
                 is meaningful and can be exploited for further location
                 exploration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Liu:2014:WYB,
  author =       "Luoqi Liu and Junliang Xing and Si Liu and Hui Xu and
                 Xi Zhou and Shuicheng Yan",
  title =        "{``Wow! You Are So Beautiful Today!''}",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "20:1--20:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2659234",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Beauty e-Experts, a fully automatic system for
                 makeover recommendation and synthesis, is developed in
                 this work. The makeover recommendation and synthesis
                 system simultaneously considers many kinds of makeover
                 items on hairstyle and makeup. Given a user-provided
                 frontal face image with short/bound hair and no/light
                 makeup, the Beauty e-Experts system not only recommends
                 the most suitable hairdo and makeup, but also
                 synthesizes the virtual hairdo and makeup effects. To
                 acquire enough knowledge for beauty modeling, we built
                 the Beauty e-Experts Database, which contains 1,505
                 female photos with a variety of attributes annotated
                 with different discrete values. We organize these
                 attributes into two different categories, beauty
                 attributes and beauty-related attributes. Beauty
                 attributes refer to those values that are changeable
                 during the makeover process and thus need to be
                 recommended by the system. Beauty-related attributes
                 are those values that cannot be changed during the
                 makeup process but can help the system to perform
                 recommendation. Based on this Beauty e-Experts Dataset,
                 two problems are addressed for the Beauty e-Experts
                 system: what to recommend and how to wear it, which
                 describes a similar process of selecting hairstyle and
                 cosmetics in daily life. For the what-to-recommend
                 problem, we propose a multiple tree-structured
                 supergraph model to explore the complex relationships
                 among high-level beauty attributes, mid-level
                 beauty-related attributes, and low-level image
                 features. Based on this model, the most compatible
                 beauty attributes for a given facial image can be
                 efficiently inferred. For the how-to-wear-it problem,
                 an effective and efficient facial image synthesis
                 module is designed to seamlessly synthesize the
                 recommended makeovers into the user facial image. We
                 have conducted extensive experiments on testing images
                 of various conditions to evaluate and analyze the
                 proposed system. The experimental results well
                 demonstrate the effectiveness and efficiency of the
                 proposed system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhang:2014:AAS,
  author =       "Hanwang Zhang and Zheng-Jun Zha and Yang Yang and
                 Shuicheng Yan and Yue Gao and Tat-Seng Chua",
  title =        "Attribute-Augmented Semantic Hierarchy: Towards a
                 Unified Framework for Content-Based Image Retrieval",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "21:1--21:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2637291",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article presents a novel attribute-augmented
                 semantic hierarchy (A$^2$ SH) and demonstrates its
                 effectiveness in bridging both the semantic and
                 intention gaps in content-based image retrieval (CBIR).
                 A$^2$ SH organizes semantic concepts into multiple
                 semantic levels and augments each concept with a set of
                 related attributes. The attributes are used to describe
                 the multiple facets of the concept and act as the
                 intermediate bridge connecting the concept and
                 low-level visual content. An hierarchical semantic
                 similarity function is learned to characterize the
                 semantic similarities among images for retrieval. To
                 better capture user search intent, a hybrid feedback
                 mechanism is developed, which collects hybrid feedback
                 on attributes and images. This feedback is then used to
                 refine the search results based on A$^2$ SH. We use
                 A$^2$ SH as a basis to develop a unified content-based
                 image retrieval system. We conduct extensive
                 experiments on a large-scale dataset of over one
                 million Web images. Experimental results show that the
                 proposed A$^2$ SH can characterize the semantic
                 affinities among images accurately and can shape user
                 search intent quickly, leading to more accurate search
                 results as compared to state-of-the-art CBIR
                 solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhao:2014:SSS,
  author =       "Xin Zhao and Xue Li and Chaoyi Pang and Quan Z. Sheng
                 and Sen Wang and Mao Ye",
  title =        "Structured Streaming Skeleton --- A New Feature for
                 Online Human Gesture Recognition",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "22:1--22:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2648583",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Online human gesture recognition has a wide range of
                 applications in computer vision, especially in
                 human-computer interaction applications. The recent
                 introduction of cost-effective depth cameras brings a
                 new trend of research on body-movement gesture
                 recognition. However, there are two major challenges:
                 (i) how to continuously detect gestures from
                 unsegmented streams, and (ii) how to differentiate
                 different styles of the same gesture from other types
                 of gestures. In this article, we solve these two
                 problems with a new effective and efficient feature
                 extraction method-Structured Streaming Skeleton
                 (SSS)-which uses a dynamic matching approach to
                 construct a feature vector for each frame. Our
                 comprehensive experiments on MSRC-12 Kinect Gesture,
                 Huawei/3DLife-2013, and MSR-Action3D datasets have
                 demonstrated superior performances than the
                 state-of-the-art approaches. We also demonstrate model
                 selection based on the proposed SSS feature, where the
                 classifier of squared loss regression with l$_{2, 1}$
                 norm regularization is a recommended classifier for
                 best performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Carbunar:2014:EFN,
  author =       "Bogdan Carbunar and Rahul Potharaju and Michael Pearce
                 and Venugopal Vasudevan and Michael Needham",
  title =        "Errata for: {A Framework for Network Aware Caching for
                 Video on Demand Systems}",
  journal =      j-TOMM,
  volume =       "11",
  number =       "1s",
  pages =        "23:1--23:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2661298",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Oct 3 12:44:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  note =         "See \cite{Carbunar:2013:FNA}.",
  abstract =     "Some errors were introduced into this article in the
                 preparation of the final source files. The errors are
                 summarized in the following text and revised pages with
                 the corrected elements indicated in red are provided.
                 The full corrected article can be accessed in the ACM
                 DL, DOI https://doi.org/10.1145/2501643.2501652 -Page
                 8: New Figure 6(a) -Page 16: New Figures 8(a), 8(b),
                 and 9(a) -Page 17: New Figure 10(b) -Page 18: New
                 Figures 11 and 12; corrected text reference -Page 19:
                 Final sentence deleted",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhang:2014:AGS,
  author =       "Ying Zhang and Luming Zhang and Roger Zimmermann",
  title =        "Aesthetics-Guided Summarization from Multiple User
                 Generated Videos",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2",
  pages =        "24:1--24:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2659520",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Jan 7 17:48:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In recent years, with the rapid development of camera
                 technology and portable devices, we have witnessed a
                 flourish of user generated videos, which are gradually
                 reshaping the traditional professional video oriented
                 media market. The volume of user generated videos in
                 repositories is increasing at a rapid rate. In today's
                 video retrieval systems, a simple query will return
                 many videos which seriously increase the viewing
                 burden. To manage these video retrievals and provide
                 viewers with an efficient way to browse, we introduce a
                 system to automatically generate a summarization from
                 multiple user generated videos and present their
                 salience to viewers in an enjoyable manner. Among
                 multiple consumer videos, we find their qualities to be
                 highly diverse due to various factors such as a
                 photographer's experience or environmental conditions
                 at the time of capture. Such quality inspires us to
                 include a video quality evaluation component into the
                 video summarization since videos with poor qualities
                 can seriously degrade the viewing experience. We first
                 propose a probabilistic model to evaluate the aesthetic
                 quality of each user generated video. This model
                 compares the rich aesthetics information from several
                 well-known photo databases with generic unlabeled
                 consumer videos, under a human perception component
                 indicating the correlation between a video and its
                 constituting frames. Subjective studies were carried
                 out with the results indicating that our method is
                 reliable. Then a novel graph-based formulation is
                 proposed for the multi-video summarization task.
                 Desirable summarization criteria is incorporated as the
                 graph attributes and the problem is solved through a
                 dynamic programming framework. Comparisons with several
                 state-of-the-art methods demonstrate that our algorithm
                 performs better than other methods in generating a
                 skimming video in preserving the essential scenes from
                 the original multiple input videos, with smooth
                 transitions among consecutive segments and appealing
                 aesthetics overall.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Calagari:2014:AAL,
  author =       "Kiana Calagari and Mohammad Reza Pakravan and Shervin
                 Shirmohammadi and Mohamed Hefeeda",
  title =        "{ALP}: Adaptive Loss Protection Scheme with Constant
                 Overhead for Interactive Video Applications",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2",
  pages =        "25:1--25:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2656203",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Jan 7 17:48:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "There has been an increasing demand for interactive
                 video transmission over the Internet for applications
                 such as video conferencing, video calls, and
                 telepresence applications. These applications are
                 increasingly moving towards providing High Definition
                 (HD) video quality to users. A key challenge in these
                 applications is to preserve the quality of video when
                 it is transported over best-effort networks that do not
                 guarantee lossless transport of video packets. In such
                 conditions, it is important to protect the transmitted
                 video by using intelligent and adaptive protection
                 schemes. Applications such as HD video conferencing
                 require live interaction among participants, which
                 limits the overall delay the system can tolerate.
                 Therefore, the protection scheme should add little or
                 no extra delay to video transport. We propose a novel
                 Adaptive Loss Protection (ALP) scheme for interactive
                 HD video applications such as video conferencing and
                 video chats. This scheme adds negligible delay to the
                 transmission process and is shown to achieve better
                 quality than other schemes in lossy networks. The
                 proposed ALP scheme adaptively applies four different
                 protection modes to cope with the dynamic network
                 conditions, which results in high video quality in all
                 network conditions. Our ALP scheme consists of four
                 protection modes; each of these modes utilizes a
                 protection method. Two of the modes rely on the
                 state-of-the-art protection methods, and we propose a
                 new Integrated Loss Protection (ILP) method for the
                 other two modes. In the ILP method we integrate three
                 factors for distributing the protection among packets.
                 These three factors are error propagation, region of
                 interest and header information. In order to decide
                 when to switch between the protection modes, a new
                 metric is proposed based on the effectiveness of each
                 mode in performing protection, rather than just
                 considering network statistics such as packet loss
                 rate. Results show that by using this metric not only
                 the overall quality will be improved but also the
                 variance of quality will decrease. One of the main
                 advantages of the proposed ALP scheme is that it does
                 not increase the bit rate overhead in poor network
                 conditions. Our results show a significant gain in
                 video quality, up to 3dB PSNR improvement is achieved
                 using our scheme, compared to protecting all packets
                 equally with the same amount of overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ren:2014:BGO,
  author =       "Dongni Ren and Yisheng Xu and S.-H. Gary Chan",
  title =        "Beyond {1Mbps} Global Overlay Live Streaming: The Case
                 of Proxy Helpers",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2",
  pages =        "26:1--26:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2652485",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Jan 7 17:48:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In order to provide live streaming over the global
                 Internet, a content provider often deploys an overlay
                 network consisting of distributed proxies placed close
                 to user pools. Streaming of multi-Mbps video over such
                 an overlay is challenging because of bandwidth
                 bottlenecks in paths. To effectively overcome these
                 bottlenecks, we consider employing proxy helpers in the
                 overlay to provide rich path diversity. The helpers do
                 not have any attached users, and hence may forward
                 partial video streams (or not at all) if necessary. In
                 this way, the helpers serve as stepping stones to
                 supply full streams to the servers. The issue is how to
                 involve the helpers in the overlay to achieve low
                 streaming delay meeting a certain high streaming
                 bitrate requirement. To address the issue, we first
                 formulate the problem which captures various delay and
                 bandwidth components, and show that it is NP-hard. We
                 then propose an efficient algorithm called
                 Stepping-Stones (SS) which can be efficiently
                 implemented in a controller. Given the encouraging
                 simulation results, we develop a novel streaming
                 testbed for SS and explore, through sets of Internet
                 experiments, the effectiveness of helpers to achieve
                 high bitrate (multi-Mbps) global live streaming. In our
                 experiments, proxies are deployed with a reasonably
                 wide global footprint. We collect more than a hundred
                 hours of streaming traces with bitrate ranging from
                 500kbps to a few Mbps. Our experimental data validates
                 that helpers indeed play an important role in achieving
                 high bitrate in today's Internet. Global multi-Mbps
                 streaming is possible due to their multihop and
                 multipath advantages. Our experimental trials and data
                 also provide valuable insights on the design of a
                 global push-based streaming network. There are strong
                 benefits of using proxy helpers to achieve high bitrate
                 and low delay.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Qian:2014:SEC,
  author =       "Shengsheng Qian and Tianzhu Zhang and Changsheng Xu
                 and M. Shamim Hossain",
  title =        "Social Event Classification via Boosted Multimodal
                 Supervised Latent {Dirichlet} Allocation",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2",
  pages =        "27:1--27:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2659521",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Jan 7 17:48:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "With the rapidly increasing popularity of social media
                 sites (e.g., Flickr, YouTube, and Facebook), it is
                 convenient for users to share their own comments on
                 many social events, which successfully facilitates
                 social event generation, sharing and propagation and
                 results in a large amount of user-contributed media
                 data (e.g., images, videos, and text) for a wide
                 variety of real-world events of different types and
                 scales. As a consequence, it has become more and more
                 difficult to exactly find the interesting events from
                 massive social media data, which is useful to browse,
                 search and monitor social events by users or
                 governments. To deal with these issues, we propose a
                 novel boosted multimodal supervised Latent Dirichlet
                 Allocation (BMM-SLDA) for social event classification
                 by integrating a supervised topic model, denoted as
                 multi-modal supervised Latent Dirichlet Allocation
                 (mm-SLDA), in the boosting framework. Our proposed
                 BMM-SLDA has a number of advantages. (1) Our mm-SLDA
                 can effectively exploit the multimodality and the
                 multiclass property of social events jointly, and make
                 use of the supervised category label information to
                 classify multiclass social event directly. (2) It is
                 suitable for large-scale data analysis by utilizing
                 boosting weighted sampling strategy to iteratively
                 select a small subset of data to efficiently train the
                 corresponding topic models. (3) It effectively exploits
                 social event structure by the document weight
                 distribution with classification error and can
                 iteratively learn new topic model to correct the
                 previously misclassified event documents. We evaluate
                 our BMM-SLDA on a real world dataset and show extensive
                 experimental results, which demonstrate that our model
                 outperforms state-of-the-art methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Ye:2014:OBL,
  author =       "Jun Ye and Kien A. Hua",
  title =        "Octree-Based {$3$D} Logic and Computation of Spatial
                 Relationships in Live Video Query Processing",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2",
  pages =        "28:1--28:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2645864",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Jan 7 17:48:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Live video computing (LVC) on distributed smart
                 cameras has many important applications; and a database
                 approach based on a Live Video DataBase Management
                 System (LVDBMS) has shown to be effective for general
                 LVC application development. The performance of such a
                 database system relies on accurate interpretation of
                 spatial relationships among objects in the live video.
                 With the popularity of affordable depth cameras, 3D
                 spatial computation techniques have been applied.
                 However, the 3D object models currently used are
                 expensive to compute, and offer limited scalability. We
                 address this drawback in this article by proposing an
                 octree-based 3D spatial logic and presenting algorithms
                 for computing 3D spatial relationships using depth
                 cameras. To support continuous query processing on live
                 video streams, we also develop a GPU-based
                 implementation of the proposed technique to further
                 enhance scalability for real-time applications.
                 Extensive performance studies based on a public RGB-D
                 dataset as well as the LVDBMS prototype demonstrates
                 the correctness and efficiency of our techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yin:2014:STT,
  author =       "Yifang Yin and Zhijie Shen and Luming Zhang and Roger
                 Zimmermann",
  title =        "Spatial-Temporal Tag Mining for Automatic Geospatial
                 Video Annotation",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2",
  pages =        "29:1--29:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2658981",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Jan 7 17:48:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Videos are increasingly geotagged and used in
                 practical and powerful GIS applications. However, video
                 search and management operations are typically
                 supported by manual textual annotations, which are
                 subjective and laborious. Therefore, research has been
                 conducted to automate or semi-automate this process.
                 Since a diverse vocabulary for video annotations is of
                 paramount importance towards good search results, this
                 article proposes to leverage crowdsourced data from
                 social multimedia applications that host tags of
                 diverse semantics to build a spatio-temporal tag
                 repository, consequently acting as input to our
                 auto-annotation approach. In particular, to build the
                 tag store, we retrieve the necessary data from several
                 social multimedia applications, mine both the spatial
                 and temporal features of the tags, and then refine and
                 index them accordingly. To better integrate the tag
                 repository, we extend our previous approach by
                 leveraging the temporal characteristics of videos as
                 well. Moreover, we set up additional ranking criteria
                 on the basis of tag similarity, popularity and location
                 bias. Experimental results demonstrate that, by making
                 use of such a tag repository, the generated tags have a
                 wide range of semantics, and the resulting rankings are
                 more consistent with human perception.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lin:2014:LAM,
  author =       "Chih-Wei Lin and Kuan-Wen Chen and Shen-Chi Chen and
                 Cheng-Wu Chen and Yi-Ping Hung",
  title =        "Large-Area, Multilayered, and High-Resolution Visual
                 Monitoring Using a Dual-Camera System",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2",
  pages =        "30:1--30:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2645862",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Jan 7 17:48:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Large-area, high-resolution visual monitoring systems
                 are indispensable in surveillance applications. To
                 construct such systems, high-quality image capture and
                 display devices are required. Whereas high-quality
                 displays have rapidly developed, as exemplified by the
                 announcement of the 85-inch 4K ultrahigh-definition TV
                 by Samsung at the 2013 Consumer Electronics Show (CES),
                 high-resolution surveillance cameras have progressed
                 slowly and remain not widely used compared with
                 displays. In this study, we designed an innovative
                 framework, using a dual-camera system comprising a
                 wide-angle fixed camera and a high-resolution
                 pan-tilt-zoom (PTZ) camera to construct a large-area,
                 multilayered, and high-resolution visual monitoring
                 system that features multiresolution monitoring of
                 moving objects. First, we developed a novel calibration
                 approach to estimate the relationship between the two
                 cameras and calibrate the PTZ camera. The PTZ camera
                 was calibrated based on the consistent property of
                 distinct pan-tilt angle at various zooming factors,
                 accelerating the calibration process without affecting
                 accuracy; this calibration process has not been
                 reported previously. After calibrating the dual-camera
                 system, we used the PTZ camera and synthesized a
                 large-area and high-resolution background image. When
                 foreground targets were detected in the images captured
                 by the wide-angle camera, the PTZ camera was controlled
                 to continuously track the user-selected target. Last,
                 we integrated preconstructed high-resolution background
                 and low-resolution foreground images captured using the
                 wide-angle camera and the high-resolution foreground
                 image captured using the PTZ camera to generate a
                 large-area, multilayered, and high-resolution view of
                 the scene.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Deng:2014:TFP,
  author =       "Zhengyu Deng and Ming Yan and Jitao Sang and
                 Changsheng Xu",
  title =        "{Twitter} is Faster: Personalized Time-Aware Video
                 Recommendation from {Twitter} to {YouTube}",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2",
  pages =        "31:1--31:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2637285",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Jan 7 17:48:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Traditional personalized video recommendation methods
                 focus on utilizing user profile or user history
                 behaviors to model user interests, which follows a
                 static strategy and fails to capture the swift shift of
                 the short-term interests of users. According to our
                 cross-platform data analysis, the information emergence
                 and propagation is faster in social textual
                 stream-based platforms than that in multimedia sharing
                 platforms at micro user level. Inspired by this, we
                 propose a dynamic user modeling strategy to tackle
                 personalized video recommendation issues in the
                 multimedia sharing platform YouTube, by transferring
                 knowledge from the social textual stream-based platform
                 Twitter. In particular, the cross-platform video
                 recommendation strategy is divided into two steps. (1)
                 Real-time hot topic detection: the hot topics that
                 users are currently following are extracted from users'
                 tweets, which are utilized to obtain the related videos
                 in YouTube. (2) Time-aware video recommendation: for
                 the target user in YouTube, the obtained videos are
                 ranked by considering the user profile in YouTube, time
                 factor, and quality factor to generate the final
                 recommendation list. In this way, the short-term (hot
                 topics) and long-term (user profile) interests of users
                 are jointly considered. Carefully designed experiments
                 have demonstrated the advantages of the proposed
                 method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Hu:2014:SFV,
  author =       "Yongtao Hu and Jan Kautz and Yizhou Yu and Wenping
                 Wang",
  title =        "Speaker-Following Video Subtitles",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2",
  pages =        "32:1--32:??",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2632111",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Jan 7 17:48:10 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We propose a new method for improving the presentation
                 of subtitles in video (e.g., TV and movies). With
                 conventional subtitles, the viewer has to constantly
                 look away from the main viewing area to read the
                 subtitles at the bottom of the screen, which disrupts
                 the viewing experience and causes unnecessary
                 eyestrain. Our method places on-screen subtitles next
                 to the respective speakers to allow the viewer to
                 follow the visual content while simultaneously reading
                 the subtitles. We use novel identification algorithms
                 to detect the speakers based on audio and visual
                 information. Then the placement of the subtitles is
                 determined using global optimization. A comprehensive
                 usability study indicated that our subtitle placement
                 method outperformed both conventional fixed-position
                 subtitling and another previous dynamic subtitling
                 method in terms of enhancing the overall viewing
                 experience and reducing eyestrain.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chen:2015:ISI,
  author =       "Kuan-Ta Chen and Songqing Chen and Wei Tsang Ooi",
  title =        "Introduction to the Special Issue on {MMSys 2014} and
                 {NOSSDAV 2014}",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2s",
  pages =        "41:1--41:??",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2717509",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Feb 25 17:56:15 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Schaber:2015:CAM,
  author =       "Philipp Schaber and Stephan Kopf and Sina Wetzel and
                 Tyler Ballast and Christoph Wesch and Wolfgang
                 Effelsberg",
  title =        "{CamMark}: Analyzing, Modeling, and Simulating
                 Artifacts in Camcorder Copies",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2s",
  pages =        "42:1--42:??",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700295",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Feb 25 17:56:15 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "To support the development of any system that includes
                 the generation and evaluation of camcorder copies, as
                 well as to provide a common benchmark for robustness
                 against camcorder copies, we present a tool to simulate
                 digital video re-acquisition using a digital video
                 camera. By resampling each video frame, we simulate the
                 typical artifacts occurring in a camcorder copy:
                 geometric modifications (aspect ratio changes,
                 cropping, perspective and lens distortion), temporal
                 sampling artifacts (due to different frame rates,
                 shutter speeds, rolling shutters, or playback), spatial
                 and color subsampling (rescaling, filtering, Bayer
                 color filter array), and processing steps (automatic
                 gain control, automatic white balance). We also support
                 the simulation of camera movement (e.g., a hand-held
                 camera) and background insertion. Furthermore, we allow
                 for an easy setup and calibration of all the simulated
                 artifacts, using sample/reference pairs of images and
                 videos. Specifically temporal subsampling effects are
                 analyzed in detail to create realistic frame blending
                 artifacts in the simulated copies. We carefully
                 evaluated our entire camcorder simulation system and
                 found that the models we developed describe and match
                 the real artifacts quite well.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Toni:2015:OSA,
  author =       "Laura Toni and Ramon Aparicio-Pardo and Karine Pires
                 and Gwendal Simon and Alberto Blanc and Pascal
                 Frossard",
  title =        "Optimal Selection of Adaptive Streaming
                 Representations",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2s",
  pages =        "43:1--43:??",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700294",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Feb 25 17:56:15 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Adaptive streaming addresses the increasing and
                 heterogeneous demand of multimedia content over the
                 Internet by offering several encoded versions for each
                 video sequence. Each version (or representation) is
                 characterized by a resolution and a bit rate, and it is
                 aimed at a specific set of users, like TV or mobile
                 phone clients. While most existing works on adaptive
                 streaming deal with effective playout-buffer control
                 strategies on the client side, in this article we take
                 a providers' perspective and propose solutions to
                 improve user satisfaction by optimizing the set of
                 available representations. We formulate an integer
                 linear program that maximizes users' average
                 satisfaction, taking into account network dynamics,
                 type of video content, and user population
                 characteristics. The solution of the optimization is a
                 set of encoding parameters corresponding to the
                 representations set that maximizes user satisfaction.
                 We evaluate this solution by simulating multiple
                 adaptive streaming sessions characterized by realistic
                 network statistics, showing that the proposed solution
                 outperforms commonly used vendor recommendations, in
                 terms of user satisfaction but also in terms of
                 fairness and outage probability. The simulation results
                 show that video content information as well as network
                 constraints and users' statistics play a crucial role
                 in selecting proper encoding parameters to provide
                 fairness among users and to reduce network resource
                 usage. We finally propose a few theoretical guidelines
                 that can be used, in realistic settings, to choose the
                 encoding parameters based on the user characteristics,
                 the network capacity and the type of video content.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chen:2015:ADF,
  author =       "Liang Chen and Yipeng Zhou and Dah Ming Chiu",
  title =        "Analysis and Detection of Fake Views in Online Video
                 Services",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2s",
  pages =        "44:1--44:??",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700290",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Feb 25 17:56:15 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Online video-on-demand(VoD) services invariably
                 maintain a view count for each video they serve, and it
                 has become an important currency for various
                 stakeholders, from viewers, to content owners,
                 advertizers, and the online service providers
                 themselves. There is often significant financial
                 incentive to use a robot (or a botnet) to artificially
                 create fake views. How can we detect fake views? Can we
                 detect them (and stop them) efficiently? What is the
                 extent of fake views with current VoD service
                 providers? These are the questions we study in this
                 article. We develop some algorithms and show that they
                 are quite effective for this problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Song:2015:SVT,
  author =       "Minseok Song and Yeongju Lee and Jinhan Park",
  title =        "Scheduling a Video Transcoding Server to Save Energy",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2s",
  pages =        "45:1--45:??",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700282",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Feb 25 17:56:15 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Recent popular streaming services such as TV
                 Everywhere, N-Screen, and dynamic adaptive streaming
                 over HTTP (DASH) need to deliver content to the wide
                 range of devices, requiring video content to be
                 transcoded into different versions. Transcoding tasks
                 require a lot of computation, and each task typically
                 has its own real-time constraint. These make it
                 difficult to manage transcoding, but the more efficient
                 use of energy in servers is an imperative. We
                 characterize transcoding workloads in terms of
                 deadlines and computation times, and propose a new
                 dynamic voltage and frequency scaling (DVFS) scheme
                 that allocates a frequency and a workload to each CPU
                 with the aim of minimizing power consumption while
                 meeting all transcoding deadlines. This scheme has been
                 simulated, and also implemented in a Linux transcoding
                 server, in which a frontend node distributes
                 transcoding requests to heterogeneous backend nodes.
                 This required a new protocol for communication between
                 nodes, a DVFS management scheme to reduce power
                 consumption and thread management and scheduling
                 schemes which ensure that transcoding deadlines are
                 met. Power measurements show that this approach can
                 reduce system-wide energy consumption by 17\% to 31\%,
                 compared with the Linux Ondemand governor.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Langroodi:2015:DCA,
  author =       "Mohsen Jamali Langroodi and Joseph Peters and Shervin
                 Shirmohammadi",
  title =        "Decoder-Complexity-Aware Encoding of Motion
                 Compensation for Multiple Heterogeneous Receivers",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2s",
  pages =        "46:1--46:??",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700300",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Feb 25 17:56:15 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "For mobile multimedia systems, advances in battery
                 technology have been much slower than those in memory,
                 graphics, and processing power, making power
                 consumption a major concern in mobile systems. The
                 computational complexity of video codecs, which
                 consists of CPU operations and memory accesses, is one
                 of the main factors affecting power consumption. In
                 this article, we propose a method that achieves
                 near-optimal video quality while respecting
                 user-defined bounds on the complexity needed to decode
                 a video. We specifically focus on the motion
                 compensation process, including motion vector
                 prediction and interpolation, because it is the single
                 largest component of computation-based power
                 consumption. We start by formulating a scenario with a
                 single receiver as a rate-distortion optimization
                 problem and we develop an efficient
                 decoder-complexity-aware video encoding method to solve
                 it. Then we extend our approach to handle multiple
                 heterogeneous receivers, each with a different
                 complexity requirement. We test our method
                 experimentally using the H.264 standard for the single
                 receiver scenario and the H.264 SVC extension for the
                 multiple receiver scenario. Our experimental results
                 show that our method can achieve up to 97\% of the
                 optimal solution value in the single receiver scenario,
                 and an average of 97\% of the optimal solution value in
                 the multiple receiver scenario. Furthermore, our tests
                 with actual power measurements show a power saving of
                 up to 23\% at the decoder when the complexity threshold
                 is halved in the encoder.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chen:2015:TAT,
  author =       "Shannon Chen and Zhenhuan Gao and Klara Nahrstedt and
                 Indranil Gupta",
  title =        "{$3$DTI} Amphitheater: Towards {$3$DTI} Broadcasting",
  journal =      j-TOMM,
  volume =       "11",
  number =       "2s",
  pages =        "47:1--47:??",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700297",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Wed Feb 25 17:56:15 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "3DTI Amphitheater is a live broadcasting system for
                 dissemination of 3DTI (3D Tele-immersive) content. The
                 virtual environment constructed by the system mimics an
                 amphitheater in the real world, where performers
                 interact with each other in the central circular stage,
                 and the audience is placed in virtual seats that
                 surround the stage. Users of the Amphitheater can be
                 geographically dispersed and the streams created by the
                 performer sites are disseminated in a P2P network among
                 the participants. To deal with the high bandwidth
                 demand and strict latency bound of the service, we
                 identify the hierarchical priority of streams in
                 construction of the content dissemination forest.
                 Result shows that the Amphitheater outperforms prior
                 3DTI systems by boosting the application QoS by a
                 factor of 2.8 while sustaining the same hundred-scale
                 audience group.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chen:2015:PMV,
  author =       "Ke Chen and Zhong Zhou and Wei Wu",
  title =        "Progressive Motion Vector Clustering for Motion
                 Estimation and Auxiliary Tracking",
  journal =      j-TOMM,
  volume =       "11",
  number =       "3",
  pages =        "33:1--33:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700296",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Feb 5 17:03:39 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The motion vector similarity between neighboring
                 blocks is widely used in motion estimation algorithms.
                 However, for nonneighboring blocks, they may also have
                 similar motions due to close depths or belonging to the
                 same object inside the scene. Therefore, the motion
                 vectors usually have several kinds of patterns, which
                 reveal a clustering structure. In this article, we
                 propose a progressive clustering algorithm, which
                 periodically counts the motion vectors of the past
                 blocks to make incremental clustering statistics. These
                 statistics are used as the motion vector predictors for
                 the following blocks. It is proved to be much more
                 efficient for one block to find the best-matching
                 candidate with the predictors. We also design the
                 clustering based search with CUDA for GPU acceleration.
                 Another interesting application of the clustering
                 statistics is persistent static object tracking. Based
                 on the statistics, several auxiliary tracking areas are
                 created to guide the object tracking. Even when the
                 target object has significant changes in appearance or
                 it disappears occasionally, its position still can be
                 predicted. The experiments on Xiph.org Video Test Media
                 dataset illustrate that our clustering based search
                 algorithm outperforms the mainstream and some
                 state-of-the-art motion estimation algorithms. It is 33
                 times faster on average than the full search algorithm
                 with only slightly higher mean-square error values in
                 the experiments. The tracking results show that the
                 auxiliary tracking areas help to locate the target
                 object effectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Shen:2015:HFM,
  author =       "Liquan Shen and Ping An and Zhaoyang Zhang and
                 Qianqian Hu and Zhengchuan Chen",
  title =        "A {$3$D--HEVC} Fast Mode Decision Algorithm for
                 Real-Time Applications",
  journal =      j-TOMM,
  volume =       "11",
  number =       "3",
  pages =        "34:1--34:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700298",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Feb 5 17:03:39 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "3D High Efficiency Video Coding (3D-HEVC) is an
                 extension of the HEVC standard for coding of multiview
                 videos and depth maps. It inherits the same quadtree
                 coding structure as HEVC for both components, which
                 allows recursively splitting into four equal-sized
                 coding units (CU). One of 11 different prediction modes
                 is chosen to code a CU in inter-frames. Similar to the
                 joint model of H.264/AVC, the mode decision process in
                 HM (reference software of HEVC) is performed using all
                 the possible depth levels and prediction modes to find
                 the one with the least rate distortion cost using a
                 Lagrange multiplier. Furthermore, both motion
                 estimation and disparity estimation need to be
                 performed in the encoding process of 3D-HEVC. Those
                 tools achieve high coding efficiency, but lead to a
                 significant computational complexity. In this article,
                 we propose a fast mode decision algorithm for 3D-HEVC.
                 Since multiview videos and their associated depth maps
                 represent the same scene, at the same time instant,
                 their prediction modes are closely linked. Furthermore,
                 the prediction information of a CU at the depth level X
                 is strongly related to that of its parent CU at the
                 depth level X-1 in the quadtree coding structure of
                 HEVC since two corresponding CUs from two neighboring
                 depth levels share similar video characteristics. The
                 proposed algorithm jointly exploits the inter-view
                 coding mode correlation, the inter-component
                 (texture-depth) correlation and the inter-level
                 correlation in the quadtree structure of 3D-HEVC.
                 Experimental results show that our algorithm saves 66\%
                 encoder runtime on average with only a 0.2\% BD-Rate
                 increase on coded views and 1.3\% BD-Rate increase on
                 synthesized views.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yang:2015:BML,
  author =       "Xiaoshan Yang and Tianzhu Zhang and Changsheng Xu and
                 Ming-Hsuan Yang",
  title =        "Boosted Multifeature Learning for Cross-Domain
                 Transfer",
  journal =      j-TOMM,
  volume =       "11",
  number =       "3",
  pages =        "35:1--35:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700286",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Feb 5 17:03:39 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Conventional learning algorithm assumes that the
                 training data and test data share a common
                 distribution. However, this assumption will greatly
                 hinder the practical application of the learned model
                 for cross-domain data analysis in multimedia. To deal
                 with this issue, transfer learning based technology
                 should be adopted. As a typical version of transfer
                 learning, domain adaption has been extensively studied
                 recently due to its theoretical value and practical
                 interest. In this article, we propose a boosted
                 multifeature learning (BMFL) approach to iteratively
                 learn multiple representations within a boosting
                 procedure for unsupervised domain adaption. The
                 proposed BMFL method has a number of properties. (1) It
                 reuses all instances with different weights assigned by
                 the previous boosting iteration and avoids discarding
                 labeled instances as in conventional methods. (2) It
                 models the instance weight distribution effectively by
                 considering the classification error and the domain
                 similarity, which facilitates learning new feature
                 representation to correct the previously misclassified
                 instances. (3) It learns multiple different feature
                 representations to effectively bridge the source and
                 target domains. We evaluate the BMFL by comparing its
                 performance on three applications: image
                 classification, sentiment classification and spam
                 filtering. Extensive experimental results demonstrate
                 that the proposed BMFL algorithm performs favorably
                 against state-of-the-art domain adaption methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lin:2015:DVS,
  author =       "Pei-Yu Lin",
  title =        "Double Verification Secret Sharing Mechanism Based on
                 Adaptive Pixel Pair Matching",
  journal =      j-TOMM,
  volume =       "11",
  number =       "3",
  pages =        "36:1--36:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700291",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Feb 5 17:03:39 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Verifiability is essential for the secret sharing
                 approach, which allows the involved participants to
                 detect cheaters during the secret retrieval process. In
                 this article, we propose a double verification secret
                 sharing (DVSS) mechanism that can not only prevent
                 fraudulent participants but also satisfy the
                 requirements of secret payload, camouflage, image
                 fidelity and lossless revealed secret. DVSS offers
                 double verification process to enhance the cheater
                 detectability; experimental results reveal that the
                 designed scheme can share larger secret capacity and
                 retain superior image quality than the related secret
                 sharing methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Wang:2015:INB,
  author =       "Shuang Wang and Shuqiang Jiang",
  title =        "{INSTRE}: a New Benchmark for Instance-Level Object
                 Retrieval and Recognition",
  journal =      j-TOMM,
  volume =       "11",
  number =       "3",
  pages =        "37:1--37:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700292",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Feb 5 17:03:39 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Over the last several decades, researches on visual
                 object retrieval and recognition have achieved fast and
                 remarkable success. However, while the category-level
                 tasks prevail in the community, the instance-level
                 tasks (especially recognition) have not yet received
                 adequate focuses. Applications such as content-based
                 search engine and robot vision systems have alerted the
                 awareness to bring instance-level tasks into a more
                 realistic and challenging scenario. Motivated by the
                 limited scope of existing instance-level datasets, in
                 this article we propose a new benchmark for
                 INSTance-level visual object REtrieval and REcognition
                 (INSTRE). Compared with existing datasets, INSTRE has
                 the following major properties: (1) balanced data
                 scale, (2) more diverse intraclass instance variations,
                 (3) cluttered and less contextual backgrounds, (4)
                 object localization annotation for each image, (5)
                 well-manipulated double-labelled images for measuring
                 multiple object (within one image) case. We will
                 quantify and visualize the merits of INSTRE data, and
                 extensively compare them against existing datasets.
                 Then on INSTRE, we comprehensively evaluate several
                 popular algorithms to large-scale object retrieval
                 problem with multiple evaluation metrics. Experimental
                 results show that all the methods suffer a performance
                 drop on INSTRE, proving that this field still remains a
                 challenging problem. Finally we integrate these
                 algorithms into a simple yet efficient scheme for
                 recognition and compare it with classification-based
                 methods. Importantly, we introduce the realistic
                 multiobjects recognition problem. All experiments are
                 conducted in both single object case and multiple
                 objects case.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lathey:2015:IEE,
  author =       "Ankita Lathey and Pradeep K. Atrey",
  title =        "Image Enhancement in Encrypted Domain over Cloud",
  journal =      j-TOMM,
  volume =       "11",
  number =       "3",
  pages =        "38:1--38:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2656205",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Feb 5 17:03:39 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Cloud-based multimedia systems are becoming
                 increasingly common. These systems offer not only
                 storage facility, but also high-end computing
                 infrastructure which can be used to process data for
                 various analysis tasks ranging from low-level data
                 quality enhancement to high-level activity and behavior
                 identification operations. However, cloud data centers,
                 being third party servers, are often prone to
                 information leakage, raising security and privacy
                 concerns. In this article, we present a Shamir's secret
                 sharing based method to enhance the quality of
                 encrypted image data over cloud. Using the proposed
                 method we show that several image enhancement
                 operations such as noise removal, antialiasing, edge
                 and contrast enhancement, and dehazing can be performed
                 in encrypted domain with near-zero loss in accuracy and
                 minimal computation and data overhead. Moreover, the
                 proposed method is proven to be information
                 theoretically secure.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yin:2015:CVC,
  author =       "Yifang Yin and Beomjoo Seo and Roger Zimmermann",
  title =        "Content vs. Context: Visual and Geographic Information
                 Use in Video Landmark Retrieval",
  journal =      j-TOMM,
  volume =       "11",
  number =       "3",
  pages =        "39:1--39:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700287",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Feb 5 17:03:39 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Due to the ubiquity of sensor-equipped smartphones, it
                 has become increasingly feasible for users to capture
                 videos together with associated geographic metadata,
                 for example the location and the orientation of the
                 camera. Such contextual information creates new
                 opportunities for the organization and retrieval of
                 geo-referenced videos. In this study we explore the
                 task of landmark retrieval through the analysis of two
                 types of state-of-the-art techniques, namely
                 media-content-based and geocontext-based retrievals.
                 For the content-based method, we choose the Spatial
                 Pyramid Matching (SPM) approach combined with two
                 advanced coding methods: Sparse Coding (SC) and
                 Locality-Constrained Linear Coding (LLC). For the
                 geo-based method, we present the Geo Landmark
                 Visibility Determination (GeoLVD) approach which
                 computes the visibility of a landmark based on
                 intersections of a camera's field-of-view (FOV) and the
                 landmark's geometric information available from
                 Geographic Information Systems (GIS) and services. We
                 first compare the retrieval results of the two methods,
                 and discuss the strengths and weaknesses of each
                 approach in terms of precision, recall and execution
                 time. Next we analyze the factors that affect the
                 effectiveness for the content-based and the geo-based
                 methods, respectively. Finally we propose a hybrid
                 retrieval method based on the integration of the visual
                 (content) and geographic (context) information, which
                 is shown to achieve significant improvements in our
                 experiments. We believe that the results and
                 observations in this work will enlighten the design of
                 future geo-referenced video retrieval systems, improve
                 our understanding of selecting the most appropriate
                 visual features for indexing and searching, and help in
                 selecting between the most suitable methods for
                 retrieval based on different conditions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Yang:2015:RCI,
  author =       "Hong-Ying Yang and Xiang-Yang Wang and Pan-Pan Niu and
                 Ai-Long Wang",
  title =        "Robust Color Image Watermarking Using Geometric
                 Invariant Quaternion Polar Harmonic Transform",
  journal =      j-TOMM,
  volume =       "11",
  number =       "3",
  pages =        "40:1--40:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700299",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Thu Feb 5 17:03:39 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "It is a challenging work to design a robust color
                 image watermarking scheme against geometric
                 distortions. Moments and moment invariants have become
                 a powerful tool in robust image watermarking owing to
                 their image description capability and geometric
                 invariance property. However, the existing moment-based
                 watermarking schemes were mainly designed for gray
                 images but not for color images, and detection quality
                 and robustness will be lowered when watermark is
                 directly embedded into the luminance component or three
                 color channels of color images. Furthermore, the
                 imperceptibility of the embedded watermark is not well
                 guaranteed. Based on algebra of quaternions and polar
                 harmonic transform (PHT), we introduced the quaternion
                 polar harmonic transform (QPHT) for invariant color
                 image watermarking in this article, which can be seen
                 as the generalization of PHT for gray-level images. It
                 is shown that the QPHT can be obtained from the PHT of
                 each color channel. We derived and analyzed the
                 rotation, scaling, and translation (RST) invariant
                 property of QPHT. We also discussed the problem of
                 color image watermarking using QPHT. Experimental
                 results are provided to illustrate the efficiency of
                 the proposed color image watermarking against geometric
                 distortions and common image processing operations
                 (including color attacks).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Krishnappa:2015:CCV,
  author =       "Dilip Kumar Krishnappa and Michael Zink and Carsten
                 Griwodz and P{\aa}l Halvorsen",
  title =        "Cache-Centric Video Recommendation: an Approach to
                 Improve the Efficiency of {YouTube} Caches",
  journal =      j-TOMM,
  volume =       "11",
  number =       "4",
  pages =        "48:1--48:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2716310",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 7 08:29:56 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article, we take advantage of the user
                 behavior of requesting videos from the top of the
                 related list provided by YouTube to improve the
                 performance of YouTube caches. We recommend that local
                 caches reorder the related lists associated with
                 YouTube videos, presenting the cached content above
                 noncached content. We argue that the likelihood that
                 viewers select content from the top of the related list
                 is higher than selection from the bottom, and pushing
                 contents already in the cache to the top of the related
                 list would increase the likelihood of choosing cached
                 content. To verify that the position on the list really
                 is the selection criterion more dominant than the
                 content itself, we conduct a user study with 40
                 YouTube-using volunteers who were presented with random
                 related lists in their everyday YouTube use. After
                 confirming our assumption, we analyze the benefits of
                 our approach by an investigation that is based on two
                 traces collected from a university campus. Our analysis
                 shows that the proposed reordering approach for related
                 lists would lead to a 2 to 5 times increase in cache
                 hit rate compared to an approach without reordering the
                 related list. This increase in hit rate would lead to
                 reduction in server load and backend bandwidth usage,
                 which in turn reduces the latency in streaming the
                 video requested by the viewer and has the potential to
                 improve the overall performance of YouTube's content
                 distribution system. An analysis of YouTube's
                 recommendation system reveals that related lists are
                 created from a small pool of videos, which increases
                 the potential for caching content from related lists
                 and reordering based on the content in the cache.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Zhang:2015:PMC,
  author =       "Yu Zhang and James Z. Wang and Jia Li",
  title =        "Parallel Massive Clustering of Discrete
                 Distributions",
  journal =      j-TOMM,
  volume =       "11",
  number =       "4",
  pages =        "49:1--49:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700293",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 7 08:29:56 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The trend of analyzing big data in artificial
                 intelligence demands highly-scalable machine learning
                 algorithms, among which clustering is a fundamental and
                 arguably the most widely applied method. To extend the
                 applications of regular vector-based clustering
                 algorithms, the Discrete Distribution (D2) clustering
                 algorithm has been developed, aiming at clustering data
                 represented by bags of weighted vectors which are well
                 adopted data signatures in many emerging information
                 retrieval and multimedia learning applications.
                 However, the high computational complexity of
                 D2-clustering limits its impact in solving massive
                 learning problems. Here we present the parallel
                 D2-clustering (PD2-clustering) algorithm with
                 substantially improved scalability. We developed a
                 hierarchical multipass algorithm structure for parallel
                 computing in order to achieve a balance between the
                 individual-node computation and the integration process
                 of the algorithm. Experiments and extensive comparisons
                 between PD2-clustering and other clustering algorithms
                 are conducted on synthetic datasets. The results show
                 that the proposed parallel algorithm achieves
                 significant speed-up with minor accuracy loss. We apply
                 PD2-clustering to image concept learning. In addition,
                 by extending D2-clustering to symbolic data, we apply
                 PD2-clustering to protein sequence clustering. For both
                 applications, we demonstrate the high competitiveness
                 of our new algorithm in comparison with other
                 state-of-the-art methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Baik:2015:EMR,
  author =       "Eilwoo Baik and Amit Pande and Prasant Mohapatra",
  title =        "Efficient {MAC} for Real-Time Video Streaming over
                 Wireless {LAN}",
  journal =      j-TOMM,
  volume =       "11",
  number =       "4",
  pages =        "50:1--50:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2744412",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 7 08:29:56 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Wireless communication systems are highly prone to
                 channel errors. With video being a major player in
                 Internet traffic and undergoing exponential growth in
                 wireless domain, we argue for the need of a Video-aware
                 MAC (VMAC) to significantly improve the throughput and
                 delay performance of real-time video streaming service.
                 VMAC makes two changes to optimize wireless LAN for
                 video traffic: (a) It incorporates a
                 Perceptual-Error-Tolerance (PET) to the MAC frames by
                 reducing MAC retransmissions while minimizing any
                 impact on perceptual video quality; and (b) It uses a
                 group NACK-based Adaptive Window (NAW) of MAC frames to
                 improve both throughput and delay performance in
                 varying channel conditions. Through simulations and
                 experiments, we observe 56--89\% improvement in
                 throughput and 34--48\% improvement in delay
                 performance over legacy DCF and 802.11e schemes. VMAC
                 also shows 15--78\% improvement over legacy schemes
                 with multiple clients.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Antaris:2015:SSC,
  author =       "Stefanos Antaris and Dimitrios Rafailidis",
  title =        "Similarity Search over the Cloud Based on Image
                 Descriptors' Dimensions Value Cardinalities",
  journal =      j-TOMM,
  volume =       "11",
  number =       "4",
  pages =        "51:1--51:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2716315",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 7 08:29:56 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In recognition that in modern applications billions of
                 images are stored into distributed databases in
                 different logical or physical locations, we propose a
                 similarity search strategy over the cloud based on the
                 dimensions value cardinalities of image descriptors.
                 Our strategy has low preprocessing requirements by
                 dividing the computational cost of the preprocessing
                 steps into several nodes over the cloud and locating
                 the descriptors with similar dimensions value
                 cardinalities logically close. New images are inserted
                 into the distributed databases over the cloud
                 efficiently, by supporting dynamical update in
                 real-time. The proposed insertion algorithm has low
                 computational complexity, depending exclusively on the
                 dimensionality of descriptors and a small subset of
                 descriptors with similar dimensions value
                 cardinalities. Finally, an efficient query processing
                 algorithm is proposed, where the dimensions of image
                 descriptors are prioritized in the searching strategy,
                 assuming that dimensions of high value cardinalities
                 have more discriminative power than the dimensions of
                 low ones. The computation effort of the query
                 processing algorithm is divided into several nodes over
                 the cloud infrastructure. In our experiments with seven
                 publicly available datasets of image descriptors, we
                 show that the proposed similarity search strategy
                 outperforms competitive methods of single node,
                 parallel and cloud-based architectures, in terms of
                 preprocessing cost, search time and accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Lin:2015:AMD,
  author =       "Yin-Tzu Lin and I-Ting Liu and Jyh-Shing Roger Jang
                 and Ja-Ling Wu",
  title =        "Audio Musical Dice Game: a User-Preference-Aware
                 Medley Generating System",
  journal =      j-TOMM,
  volume =       "11",
  number =       "4",
  pages =        "52:1--52:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2710015",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 7 08:29:56 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article proposes a framework for creating
                 user-preference-aware music medleys from users' music
                 collections. We treat the medley generation process as
                 an audio version of a musical dice game. Once the
                 user's collection has been analyzed, the system is able
                 to generate various pleasing medleys. This flexibility
                 allows users to create medleys according to the
                 specified conditions, such as the medley structure or
                 the must-use clips. Even users without musical
                 knowledge can compose medley songs from their favorite
                 tracks. The effectiveness of the system has been
                 evaluated through both objective and subjective
                 experiments on individual components in the system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Chen:2015:AVR,
  author =       "Bo-Hao Chen and Shih-Chia Huang",
  title =        "An Advanced Visibility Restoration Algorithm for
                 Single Hazy Images",
  journal =      j-TOMM,
  volume =       "11",
  number =       "4",
  pages =        "53:1--53:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2726947",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 7 08:29:56 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Haze removal is the process by which horizontal
                 obscuration is eliminated from hazy images captured
                 during inclement weather. Images captured in natural
                 environments with varied weather conditions frequently
                 exhibit localized light sources or color-shift effects.
                 The occurrence of these effects presents a difficult
                 challenge for hazy image restoration, with which many
                 traditional restoration methods cannot adequately
                 contend. In this article, we present a new image haze
                 removal approach based on Fisher's linear
                 discriminant-based dual dark channel prior scheme in
                 order to solve the problems associated with the
                 presence of localized light sources and color shifts,
                 and thereby achieve effective restoration. Experimental
                 restoration results via qualitative and quantitative
                 evaluations show that our proposed approach can provide
                 higher haze-removal efficacy for images captured in
                 varied weather conditions than can the other
                 state-of-the-art approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Bao:2015:CPE,
  author =       "Bing-Kun Bao and Changsheng Xu and Weiqing Min and
                 Mohammod Shamim Hossain",
  title =        "Cross-Platform Emerging Topic Detection and
                 Elaboration from Multimedia Streams",
  journal =      j-TOMM,
  volume =       "11",
  number =       "4",
  pages =        "54:1--54:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2730889",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 7 08:29:56 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "With the explosive growth of online media platforms in
                 recent years, it becomes more and more attractive to
                 provide users a solution of emerging topic detection
                 and elaboration. And this posts a real challenge to
                 both industrial and academic researchers because of the
                 overwhelming information available in multiple
                 modalities and with large outlier noises. This article
                 provides a method on emerging topic detection and
                 elaboration using multimedia streams cross different
                 online platforms. Specifically, Twitter, New York Times
                 and Flickr are selected for the work to represent the
                 microblog, news portal and imaging sharing platforms.
                 The emerging keywords of Twitter are firstly extracted
                 using aging theory. Then, to overcome the nature of
                 short length message in microblog, Robust
                 Cross-Platform Multimedia Co-Clustering (RCPMM-CC) is
                 proposed to detect emerging topics with three
                 novelties: (1) The data from different media platforms
                 are in multimodalities; (2) The coclustering is
                 processed based on a pairwise correlated structure, in
                 which the involved three media platforms are pairwise
                 dependent; (3) The noninformative samples are
                 automatically pruned away at the same time of
                 coclustering. In the last step of cross-platform
                 elaboration, we enrich each emerging topic with the
                 samples from New York Times and Flickr by computing the
                 implicit links between social topics and samples from
                 selected news and Flickr image clusters, which are
                 obtained by RCPMM-CC. Qualitative and quantitative
                 evaluation results demonstrate the effectiveness of our
                 method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Li:2015:QQG,
  author =       "Yang Li and Azzedine Boukerche",
  title =        "{QuGu}: a Quality Guaranteed Video Dissemination
                 Protocol Over Urban Vehicular Ad Hoc Networks",
  journal =      j-TOMM,
  volume =       "11",
  number =       "4",
  pages =        "55:1--55:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2725469",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 7 08:29:56 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "Video dissemination over Vehicular Ad Hoc Networks is
                 an attractive technology that supports many novel
                 applications. The merit of this work lies in the design
                 of an efficient video dissemination protocol that
                 provides high video quality at different data rates for
                 urban scenarios. Our objective is to improve received
                 video quality while meeting delay and packet loss. In
                 this work, we first employ a reliable scheme known as
                 connected dominating set, which is an efficient
                 receiver-based routing scheme for broadcasting video
                 content. To avoid repeated computing of the connected
                 dominating set, we add three statuses to each node. In
                 nonscalable video coding, the distribution of lost
                 frames can cause a major impact on video quality at the
                 receiver's end. Therefore, for the second step, we
                 employ Interleaving to spread out the burst losses and
                 to reduce the influence of loss distributions. Although
                 Interleaving can reduce the influence of cluster frame
                 loss, single packet loss is also a concern due to
                 collisions, and to intermittent disconnection in the
                 topology. In order to fix these single packet losses,
                 we propose a store-carry-forward scheme for the nodes
                 in order to retransmit the local buffer stored packets.
                 The results, when compared to the selected base
                 protocols, show that our proposed protocol is an
                 efficient solution for video dissemination over urban
                 Vehicular Ad Hoc Networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Gaddam:2015:COM,
  author =       "Vamsidhar Reddy Gaddam and Ragnhild Eg and Ragnar
                 Langseth and Carsten Griwodz and P{\aa}l Halvorsen",
  title =        "The Cameraman Operating My Virtual Camera is
                 Artificial: Can the Machine Be as Good as a Human?",
  journal =      j-TOMM,
  volume =       "11",
  number =       "4",
  pages =        "56:1--56:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2744411",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 7 08:29:56 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "In this article, we argue that the energy spent in
                 designing autonomous camera control systems is not
                 spent in vain. We present a real-time virtual camera
                 system that can create smooth camera motion. Similar
                 systems are frequently benchmarked with the human
                 operator as the best possible reference; however, we
                 avoid a priori assumptions in our evaluations. Our main
                 question is simply whether we can design algorithms to
                 steer a virtual camera that can compete with the user
                 experience for recordings from an expert operator with
                 several years of experience? In this respect, we
                 present two low-complexity servoing methods that are
                 explored in two user studies. The results from the user
                 studies give a promising answer to the question
                 pursued. Furthermore, all components of the system meet
                 the real-time requirements on commodity hardware. The
                 growing capabilities of both hardware and network in
                 mobile devices give us hope that this system can be
                 deployed to mobile users in the near future. Moreover,
                 the design of the presented system takes into account
                 that services to concurrent users must be supported.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Natarajan:2015:MCC,
  author =       "Prabhu Natarajan and Pradeep K. Atrey and Mohan
                 Kankanhalli",
  title =        "Multi-Camera Coordination and Control in Surveillance
                 Systems: a Survey",
  journal =      j-TOMM,
  volume =       "11",
  number =       "4",
  pages =        "57:1--57:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2710128",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 7 08:29:56 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "The use of multiple heterogeneous cameras is becoming
                 more common in today's surveillance systems. In order
                 to perform surveillance tasks, effective coordination
                 and control in multi-camera systems is very important,
                 and is catching significant research attention these
                 days. This survey aims to provide researchers with a
                 state-of-the-art overview of various techniques for
                 multi-camera coordination and control (MC$^3$) that
                 have been adopted in surveillance systems. The existing
                 literature on MC$^3$ is presented through several
                 classifications based on the applicable architectures,
                 frameworks and the associated surveillance tasks.
                 Finally, a discussion on the open problems in
                 surveillance area that can be solved effectively using
                 MC$^3$ and the future directions in MC$^3$ research is
                 presented",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{You:2015:UPD,
  author =       "Shingchern D. You and Yi-Han Pu",
  title =        "Using Paired Distances of Signal Peaks in Stereo
                 Channels as Fingerprints for Copy Identification",
  journal =      j-TOMM,
  volume =       "12",
  number =       "1",
  pages =        "1:1--1:??",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2742059",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 28 06:14:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "This article proposes to use the relative distances
                 between adjacent envelope peaks detected in stereo
                 audio as fingerprints for copy identification. The
                 matching algorithm used is the rough longest common
                 subsequence (RLCS) algorithm. The experimental results
                 show that the proposed approach has better
                 identification accuracy than an MPEG-7 based scheme for
                 distorted and noisy audio. When compared with other
                 schemes, the proposed scheme uses fewer bits with
                 comparable performance. The proposed fingerprints can
                 also be used in conjunction with the MPEG-7 based
                 scheme for lower computational burden.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{ElEssaili:2015:QBC,
  author =       "Ali {El Essaili} and Zibin Wang and Eckehard Steinbach
                 and Liang Zhou",
  title =        "{QoE}-Based Cross-Layer Optimization for Uplink Video
                 Transmission",
  journal =      j-TOMM,
  volume =       "12",
  number =       "1",
  pages =        "2:1--2:??",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2801124",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 28 06:14:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
  abstract =     "We study the problem of resource-efficient uplink
                 distribution of user-generated video content over
                 fourth-generation mobile networks. This is challenged
                 by (1) the capacity-limited and time-variant uplink
                 channel, (2) the resource-hungry upstreamed videos and
                 their dynamically changing complexity, and (3) the
                 different playout times of the video consumers. To
                 address these issues, we propose a systematic approach
                 for quality-of-experience (QoE)-based resource
                 optimization and uplink transmission of multiuser
                 generated video content. More specifically, we present
                 an analytical model for distributed scalable video
                 transmission at the mobile producers which considers
                 these constraints. This is complemented by a multiuser
                 cross-layer optimizer in the mobile network which
                 determines the transmission capacity for each mobile
                 terminal under current cell load and radio conditions.
                 Both optimal and low-complexity solutions are
                 presented. Simulation results for LTE uplink
                 transmission show that significant gains in perceived
                 video quality can be achieved by our cross-layer
                 resource optimization scheme. In addition, the
                 distributed optimization at the mobile producers can
                 further improve the user experience across the
                 different types of video consumers.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Multimed Comput. Commun. Appl.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Multimedia Computing,
                 Communications, and Applications",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J961",
}

@Article{Li:2015:CSN,
  author =       "Li-Jia Li and David A. Shamma and Xiangnan Kong and
                 Sina Jafarpour and Roelof {Van Zwol} and Xuanhui Wang",
  title =        "{CelebrityNet}: a Social Network Constructed from
                 Large-Scale Online Celebrity Images",
  journal =      j-TOMM,
  volume =       "12",
  number =       "1",
  pages =        "3:1--3:??",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2801125",
  ISSN =         "1551-6857 (print), 1551-6865 (electronic)",
  ISSN-L =       "1551-6857",
  bibdate =      "Fri Aug 28 06:14:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tomccap/;
                 https://www