@Preamble{"\input bibnames.sty" #
"\ifx \Thorn \undefined \def \Thorn {T}\fi" #
"\hyphenation{
}"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|https://www.math.utah.edu/~beebe/|"}
@String{j-TOMCCAP = "ACM Transactions on Multimedia Computing,
Communications, and Applications"}
@String{j-TOMM = "ACM Transactions on Multimedia Computing,
Communications, and Applications"}
@Article{Georganas:2005:EBA,
author = "Nicolas D. Georganas",
title = "{Editorial}: {The} birth of the {ACM Transactions on
Multimedia Computing, Communications and Applications}
{(TOMCCAP)}",
journal = j-TOMCCAP,
volume = "1",
number = "1",
pages = "1--2",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Apr 14 11:01:03 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Rowe:2005:ASR,
author = "Lawrence A. Rowe and Ramesh Jain",
title = "{ACM SIGMM Retreat} report on future directions in
multimedia research",
journal = j-TOMCCAP,
volume = "1",
number = "1",
pages = "3--13",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Apr 14 11:01:03 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Jain:2005:GEI,
author = "Ramesh Jain and Thomas Plagemann and Ralf Steinmetz",
title = "Guest editorial: {The International ACM Multimedia
Conference 1993} --- ten years after",
journal = j-TOMCCAP,
volume = "1",
number = "1",
pages = "14--15",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Apr 14 11:01:03 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Teodosio:2005:SS,
author = "Laura Teodosio and Walter Bender",
title = "Salient stills",
journal = j-TOMCCAP,
volume = "1",
number = "1",
pages = "16--36",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Apr 14 11:01:03 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Reddy:2005:DSM,
author = "A. L. N. Reddy and Jim Wyllie and K. B. R.
Wijayaratne",
title = "Disk scheduling in a multimedia {I/O} system",
journal = j-TOMCCAP,
volume = "1",
number = "1",
pages = "37--59",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Apr 14 11:01:03 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Buchanan:2005:ATL,
author = "M. Cecelia Buchanan and Polle T. Zellweger",
title = "Automatic temporal layout mechanisms revisited",
journal = j-TOMCCAP,
volume = "1",
number = "1",
pages = "60--88",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Apr 14 11:01:03 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Bulterman:2005:SMA,
author = "Dick C. A. Bulterman and Lynda Hardman",
title = "Structured multimedia authoring",
journal = j-TOMCCAP,
volume = "1",
number = "1",
pages = "89--109",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Apr 14 11:01:03 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Mayer-Patel:2005:BSM,
author = "Ketan Mayer-Patel and Brian C. Smith and Lawrence A.
Rowe",
title = "The {Berkeley} software {MPEG-1} video decoder",
journal = j-TOMCCAP,
volume = "1",
number = "1",
pages = "110--125",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Apr 14 11:01:03 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Plagemann:2005:SPA,
author = "Thomas Plagemann and Prashant Shenoy and John R.
Smith",
title = "Selected papers from the {ACM Multimedia Conference
2003}",
journal = j-TOMCCAP,
volume = "1",
number = "2",
pages = "127--127",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Jul 7 13:52:13 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Kum:2005:RTM,
author = "Sang-Uok Kum and Ketan Mayer-Patel",
title = "Real-time multidepth stream compression",
journal = j-TOMCCAP,
volume = "1",
number = "2",
pages = "128--150",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Jul 7 13:52:13 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Feng:2005:PSL,
author = "Wu-Chi Feng and Ed Kaiser and Wu Chang Feng and Mikael
Le Baillif",
title = "{Panoptes}: scalable low-power video sensor networking
technologies",
journal = j-TOMCCAP,
volume = "1",
number = "2",
pages = "151--167",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Jul 7 13:52:13 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Goh:2005:SFD,
author = "Kingshy Goh and Beitao Li and Edward Y. Chang",
title = "Semantics and feature discovery via confidence-based
ensemble",
journal = j-TOMCCAP,
volume = "1",
number = "2",
pages = "168--189",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Jul 7 13:52:13 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Baker:2005:UPC,
author = "H. Harlyn Baker and Nina Bhatti and Donald Tanguay and
Irwin Sobel and Dan Gelb and Michael E. Goss and W.
Bruce Culbertson and Thomas Malzbender",
title = "Understanding performance in {Coliseum}, an immersive
videoconferencing system",
journal = j-TOMCCAP,
volume = "1",
number = "2",
pages = "190--210",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Jul 7 13:52:13 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Adams:2005:IIM,
author = "Brett Adams and Svetha Venkatesh and Ramesh Jain",
title = "{IMCE}: {Integrated} media creation environment",
journal = j-TOMCCAP,
volume = "1",
number = "3",
pages = "211--247",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Fri Nov 18 08:30:19 MST 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Poellabauer:2005:FCD,
author = "Christian Poellabauer and Karsten Schwan",
title = "Flexible cross-domain event delivery for
quality-managed multimedia applications",
journal = j-TOMCCAP,
volume = "1",
number = "3",
pages = "248--268",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Fri Nov 18 08:30:19 MST 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Cooper:2005:TEC,
author = "Matthew Cooper and Jonathan Foote and Andreas
Girgensohn and Lynn Wilcox",
title = "Temporal event clustering for digital photo
collections",
journal = j-TOMCCAP,
volume = "1",
number = "3",
pages = "269--288",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Fri Nov 18 08:30:19 MST 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Li:2005:CEM,
author = "Keqiu Li and Hong Shen",
title = "Coordinated enroute multimedia object caching in
transcoding proxies for tree networks",
journal = j-TOMCCAP,
volume = "1",
number = "3",
pages = "289--314",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Fri Nov 18 08:30:19 MST 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wu:2005:AFE,
author = "Huahui Wu and Mark Claypool and Robert Kinicki",
title = "Adjusting forward error correction with temporal
scaling for {TCP}-friendly streaming {MPEG}",
journal = j-TOMCCAP,
volume = "1",
number = "4",
pages = "315--337",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Cai:2005:LUL,
author = "Jianfei Cai and Xiangjun Li and Chang Wen Chen",
title = "Layered unequal loss protection with pre-interleaving
for fast progressive image transmission over
packet-loss channels",
journal = j-TOMCCAP,
volume = "1",
number = "4",
pages = "338--353",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Tu:2005:ASP,
author = "Yi-Cheng Tu and Jianzhong Sun and Mohamed Hefeeda and
Sunil Prabhakar",
title = "An analytical study of peer-to-peer media streaming
systems",
journal = j-TOMCCAP,
volume = "1",
number = "4",
pages = "354--376",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lew:2006:CBM,
author = "Michael S. Lew and Nicu Sebe and Chabane Djeraba and
Ramesh Jain",
title = "Content-based multimedia information retrieval:
{State} of the art and challenges",
journal = j-TOMCCAP,
volume = "2",
number = "1",
pages = "1--19",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{DelBimbo:2006:CBR,
author = "Alberto {Del Bimbo} and Pietro Pala",
title = "Content-based retrieval of {$3$D} models",
journal = j-TOMCCAP,
volume = "2",
number = "1",
pages = "20--43",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Xu:2006:FAF,
author = "Huaxin Xu and Tat-Seng Chua",
title = "Fusion of {AV} features and external information
sources for event detection in team sports video",
journal = j-TOMCCAP,
volume = "2",
number = "1",
pages = "44--67",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Joshi:2006:SPE,
author = "Dhiraj Joshi and James Z. Wang and Jia Li",
title = "The {Story Picturing Engine}---a system for automatic
text illustration",
journal = j-TOMCCAP,
volume = "2",
number = "1",
pages = "68--89",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Snoek:2006:LRS,
author = "Cees G. M. Snoek and Marcel Worring and Alexander G.
Hauptmann",
title = "Learning rich semantics from news video archives by
style analysis",
journal = j-TOMCCAP,
volume = "2",
number = "2",
pages = "91--108",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yang:2006:SER,
author = "Guang Yang and Tony Sun and Mario Gerla and M. Y.
Sanadidi and Ling-Jyh Chen",
title = "Smooth and efficient real-time video transport in the
presence of wireless errors",
journal = j-TOMCCAP,
volume = "2",
number = "2",
pages = "109--126",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Shao:2006:ASM,
author = "Xi Shao and Changsheng Xu and Namunu C. Maddage and Qi
Tian and Mohan S. Kankanhalli and Jesse S. Jin",
title = "Automatic summarization of music videos",
journal = j-TOMCCAP,
volume = "2",
number = "2",
pages = "127--148",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Eide:2006:RTV,
author = "Viktor S. Wold Eide and Ole-Christoffer Granmo and
Frank Eliassen and J{\o}rgen Andreas Michaelsen",
title = "Real-time video content analysis: {QoS}-aware
application composition and parallel processing",
journal = j-TOMCCAP,
volume = "2",
number = "2",
pages = "149--172",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Candan:2006:ISI,
author = "K. Sel{\c{c}}uk Candan and Augusto Celentano and
Wolfgang Klas",
title = "Introduction to special issue on the use of context in
multimedia information systems",
journal = j-TOMCCAP,
volume = "2",
number = "3",
pages = "173--176",
month = aug,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ferrara:2006:SWO,
author = "Alfio Ferrara and Luca A. Ludovico and Stefano
Montanelli and Silvana Castano and Goffredo Haus",
title = "A {Semantic Web} ontology for context-based
classification and retrieval of music resources",
journal = j-TOMCCAP,
volume = "2",
number = "3",
pages = "177--198",
month = aug,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Arigon:2006:HMP,
author = "Anne-Muriel Arigon and Anne Tchounikine and Maryvonne
Miquel",
title = "Handling multiple points of view in a multimedia data
warehouse",
journal = j-TOMCCAP,
volume = "2",
number = "3",
pages = "199--218",
month = aug,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Kahol:2006:MCH,
author = "Kanav Kahol and Priyamvada Tripathi and Troy Mcdaniel
and Laura Bratton and Sethuraman Panchanathan",
title = "Modeling context in haptic perception, rendering, and
visualization",
journal = j-TOMCCAP,
volume = "2",
number = "3",
pages = "219--240",
month = aug,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Thu Sep 7 16:13:26 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Gulliver:2006:DUP,
author = "Stephen R. Gulliver and Gheorghita Ghinea",
title = "Defining user perception of distributed multimedia
quality",
journal = j-TOMCCAP,
volume = "2",
number = "4",
pages = "241--257",
month = nov,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Gopalan:2006:SAC,
author = "Kartik Gopalan and Lan Huang and Gang Peng and
Tzi-Cker Chiueh and Yow-Jian Lin",
title = "Statistical admission control using delay distribution
measurements",
journal = j-TOMCCAP,
volume = "2",
number = "4",
pages = "258--281",
month = nov,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Li:2006:MSP,
author = "H. Li and M. Li and B. Prabhakaran",
title = "Middleware for streaming {$3$D} progressive meshes
over lossy networks",
journal = j-TOMCCAP,
volume = "2",
number = "4",
pages = "282--317",
month = nov,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Etsion:2006:PPU,
author = "Yoav Etsion and Dan Tsafrir and Dror G. Feitelson",
title = "Process prioritization using output production:
{Scheduling} for multimedia",
journal = j-TOMCCAP,
volume = "2",
number = "4",
pages = "318--342",
month = nov,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Cesar:2006:GAH,
author = "Pablo Cesar and Petri Vuorimaa and Juha Vierinen",
title = "A graphics architecture for high-end interactive
television terminals",
journal = j-TOMCCAP,
volume = "2",
number = "4",
pages = "343--357",
month = nov,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Madhwacharyula:2006:MHV,
author = "Chitra L. Madhwacharyula and Marc Davis and Philippe
Mulhem and Mohan S. Kankanhalli",
title = "Metadata handling: a video perspective",
journal = j-TOMCCAP,
volume = "2",
number = "4",
pages = "358--388",
month = nov,
year = "2006",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Atrey:2007:GOO,
author = "Pradeep K. Atrey and Mohan S. Kankanhalli and John B.
Oommen",
title = "Goal-oriented optimal subset selection of correlated
multimedia streams",
journal = j-TOMCCAP,
volume = "3",
number = "1",
pages = "??--??",
month = feb,
year = "2007",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "2",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chen:2007:DSI,
author = "Datong Chen and Jie Yang and Robert Malkin and Howard
D. Wactlar",
title = "Detecting social interactions of the elderly in a
nursing home environment",
journal = j-TOMCCAP,
volume = "3",
number = "1",
pages = "??--??",
month = feb,
year = "2007",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "6",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Heck:2007:VV,
author = "Rachel Heck and Michael Wallick and Michael Gleicher",
title = "Virtual videography",
journal = j-TOMCCAP,
volume = "3",
number = "1",
pages = "??--??",
month = feb,
year = "2007",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "4",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Truong:2007:VAS,
author = "Ba Tu Truong and Svetha Venkatesh",
title = "Video abstraction: a systematic review and
classification",
journal = j-TOMCCAP,
volume = "3",
number = "1",
pages = "??--??",
month = feb,
year = "2007",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "3",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Xu:2007:CAD,
author = "Changsheng Xu and Namunu C. Maddage and Xi Shao and Qi
Tian",
title = "Content-adaptive digital music watermarking based on
music structure analysis",
journal = j-TOMCCAP,
volume = "3",
number = "1",
pages = "??--??",
month = feb,
year = "2007",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "1",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yan:2007:MSO,
author = "Wei-Qi Yan and Mohan S. Kankanhalli",
title = "Multimedia simplification for optimized {MMS}
synthesis",
journal = j-TOMCCAP,
volume = "3",
number = "1",
pages = "??--??",
month = feb,
year = "2007",
CODEN = "????",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Apr 14 11:19:17 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "5",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2007:CAT,
author = "Tiecheng Liu and John R. Kender",
title = "Computational approaches to temporal sampling of video
sequences",
journal = j-TOMCCAP,
volume = "3",
number = "2",
pages = "7:1--7:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1230812.1230813",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:04 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Video key frame extraction is one of the most
important research problems for video summarization,
indexing, and retrieval. For a variety of applications
such as ubiquitous media access and video streaming,
the temporal boundaries between video key frames are
required for synchronizing visual content with audio.
In this article, we define temporal video sampling as a
unified process of extracting video key frames and
computing their temporal boundaries, and formulate it
as an optimization problem. We first provide an optimal
approach that minimizes temporal video sampling error
using a dynamic programming process. The optimal
approach retrieves a key frame hierarchy and all
temporal boundaries in $ O(n^4) $ time and $ O(n^2) $
space. To further reduce computational complexity, we
also provide a suboptimal greedy algorithm that
exploits the data structure of a binary heap and uses a
novel ``look-ahead'' computational technique, enabling
all levels of key frames to be extracted with an
average-case computational time of $ O(n \log n) $ and
memory usage of $ O(n) $. Both the optimal and the
greedy methods are free of parameters, thus avoiding
the threshold-selection problem that exists in other
approaches. We empirically compare the proposed optimal
and greedy methods with several existing methods in
terms of video sampling error, computational cost, and
subjective quality. An evaluation of eight videos of
different genres shows that the greedy approach
achieves performance very close to that of the optimal
approach while drastically reducing computational cost,
making it suitable for processing long video sequences
in large video databases.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "7",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "key frame selection; temporal video sampling;
ubiquitous media access; video content analysis; video
summarization",
}
@Article{Moncrieff:2007:OAB,
author = "Simon Moncrieff and Svetha Venkatesh and Geoff West",
title = "Online audio background determination for complex
audio environments",
journal = j-TOMCCAP,
volume = "3",
number = "2",
pages = "8:1--8:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1230812.1230814",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:04 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We present a method for foreground/background
separation of audio using a background modelling
technique. The technique models the background in an
online, unsupervised, and adaptive fashion, and is
designed for application to long term surveillance and
monitoring problems. The background is determined using
a statistical method to model the states of the audio
over time. In addition, three methods are used to
increase the accuracy of background modelling in
complex audio environments. Such environments can cause
the failure of the statistical model to accurately
capture the background states. An entropy-based
approach is used to unify background representations
fragmented over multiple states of the statistical
model. The approach successfully unifies such
background states, resulting in a more robust
background model. We adaptively adjust the number of
states considered background according to background
complexity, resulting in the more accurate
classification of background models. Finally, we use an
auxiliary model cache to retain potential background
states in the system. This prevents the deletion of
such states due to a rapid influx of observed states
that can occur for highly dynamic sections of the audio
signal. The separation algorithm was successfully
applied to a number of audio environments representing
monitoring applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "8",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "audio analysis; online background modelling;
surveillance and monitoring",
}
@Article{Oshima:2007:PDS,
author = "Chika Oshima and Kazushi Nishimoto and Norihiro
Hagita",
title = "A piano duo support system for parents to lead
children to practice musical performances",
journal = j-TOMCCAP,
volume = "3",
number = "2",
pages = "9:1--9:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1230812.1230815",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:04 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article, we propose ``Family Ensemble,'' a
piano duo support system for a musically inept parent
and his/her child who is a beginner at playing the
piano. The system makes it easier for parents to
correctly reproduce a given sequence of pitches along
with the child's performance by using score tracking
and note-replacement functions. The experiments with
this support system showed that the parents can
immediately participate in the piano duo. Furthermore,
we found that during joint practices using Family
Ensemble some subjects discussed musical ideas that
they would not have talked about without using the
system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "9",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "entertainment; musical expression; piano duo; score
tracking; support system",
}
@Article{He:2007:CSW,
author = "Xiaofei He and Deng Cai and Ji-Rong Wen and Wei-Ying
Ma and Hong-Jiang Zhang",
title = "Clustering and searching {WWW} images using link and
page layout analysis",
journal = j-TOMCCAP,
volume = "3",
number = "2",
pages = "10:1--10:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1230812.1230816",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:04 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Due to the rapid growth of the number of digital
images on the Web, there is an increasing demand for an
effective and efficient method for organizing and
retrieving the available images. This article describes
iFind, a system for clustering and searching WWW
images. By using a vision-based page segmentation
algorithm, a Web page is partitioned into blocks, and
the textual and link information of an image can be
accurately extracted from the block containing that
image. The textual information is used for image
indexing. By extracting the page-to-block,
block-to-image, block-to-page relationships through
link structure and page layout analysis, we construct
an image graph. Our method is less sensitive to noisy
links than previous methods like PageRank, HITS, and
PicASHOW, and hence the image graph can better reflect
the semantic relationship between images. Using the
notion of Markov Chain, we can compute the limiting
probability distributions of the images, ImageRanks,
which characterize the importance of the images. The
ImageRanks are combined with the relevance scores to
produce the final ranking for image search. With the
graph models, we can also use techniques from spectral
graph theory for image clustering and embedding, or 2-D
visualization. Some experimental results on 11.6
million images downloaded from the Web are provided in
the article.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "10",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "image clustering; image search; link analysis; Web
mining",
}
@Article{Jung:2007:NBA,
author = "Byunghee Jung and Junehwa Song and Yoonjoon Lee",
title = "A narrative-based abstraction framework for
story-oriented video",
journal = j-TOMCCAP,
volume = "3",
number = "2",
pages = "11:1--11:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1230812.1230817",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:04 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article proposes a novel video abstraction
framework for online review services of story-oriented
videos such as dramas. Among the many genres of TV
programs, a drama is one of the most popularly watched
on the Web. The abstracts generated by the proposed
framework not only give a summary of a video but also
effectively help viewers understand the overall story.
In addition, our method is duration-flexible. We get
clues about human understanding of a story from
scenario writing rules and editorial techniques that
are popularly used in the process of video production
to explicitly express a narrative, and propose a new
video abstraction model, called a Narrative Abstraction
Model. The model effectively captures the narrative
structure embedded in a story-oriented video and
articulates the progress of the story in a weighted
directed graph, called a Narrative Structure Graph
(NSG). The model provides a basis for a flexible
framework for abstract generation using the NSG as the
intermediary representation of a video. Different
abstracts can be appropriately generated based upon
different user requirements. To show the effectiveness
of the proposed model and method, we developed a video
abstraction system realizing the framework, and
successfully applied it to large volumes of TV dramas.
The evaluation results show that the proposed framework
is a feasible solution for online review services.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "11",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "film; narrative structure; online review services;
story understanding; story-oriented; video abstraction;
video abstraction system",
}
@Article{Shacham:2007:UDP,
author = "Ron Shacham and Henning Schulzrinne and Srisakul
Thakolsri and Wolfgang Kellerer",
title = "Ubiquitous device personalization and use: {The} next
generation of {IP} multimedia communications",
journal = j-TOMCCAP,
volume = "3",
number = "2",
pages = "12:1--12:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1230812.1230818",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:04 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Service usage in emerging ubiquitous environments
includes seamless and personalized usage of public and
private devices discovered in the vicinity of a user.
In our work, we describe an architecture for device
discovery, device configuration, and the transfer of
active sessions between devices. The presented
architecture uses the Session Initiation Protocol (SIP)
as a standardized, widely used signaling protocol for
IP-based multimedia services. Our solution includes
support of simple existing devices, split of sessions
between devices, user-control of location-based
behavior, and handling of security and privacy
concerns. We present the implementation and show the
feasibility of our work with analytical evaluation and
measurements.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "12",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Internet multimedia; location-based services; mobile
communications; ubiquitous computing",
}
@Article{Chen:2007:EMO,
author = "Herng-Yow Chen and Sheng-Wei Li",
title = "Exploring many-to-one speech-to-text correlation for
{Web}-based language learning",
journal = j-TOMCCAP,
volume = "3",
number = "3",
pages = "13:1--13:??",
month = aug,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1236471.1236472",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article investigates the correlations between
multimedia objects (particularly speech and text)
involved in language lectures in order to design an
effective presentation mechanism for web-based
learning. The cross-media correlations are classified
into implicit relations (retrieved by computing) and
explicit relations (recorded during the preprocessing
stage). The implicit temporal correlation between
speech and text is primarily to help to negotiate
supplementary lecture navigations like tele-pointer
movement, lips-sync movement, and content scrolling. We
propose a speech-text alignment framework, using an
iterative algorithm based on local alignment, to probe
many-to-one temporal correlations, and not the
one-to-one only. The proposed framework is a more
practical method for analyzing general language
lectures, and the algorithm's time complexity conforms
to the best-possible computation cost, O(nm), without
introducing additional computation. In addition, we
have shown the feasibility of creating vivid
presentations by exploiting implicit relations and
artificially simulating some explicit media. To
facilitate the navigation of integrated multimedia
documents, we develop several visualization techniques
for describing media correlations, including guidelines
for speech-text correlations, visible-automatic
scrolling, and levels of detail of timeline, to provide
intuitive and easy-to-use random access mechanisms. We
evaluated the performance of the analysis method and
human perceptions of the synchronized presentation. The
overall performance of the analysis method is that
about 99.5\% of the words analyzed are of a temporal
error within 0.5 sec and the subjective evaluation
result shows that the synchronized presentation is
highly acceptable to human beings.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "13",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "analysis and presentation; computed synchronization;
cross-media correlation; lips sync; speech-to-text
alignment",
}
@Article{Wang:2007:EST,
author = "Surong Wang and Manoranjan Dash and Liang-Tien Chia
and Min Xu",
title = "Efficient sampling of training set in large and noisy
multimedia data",
journal = j-TOMCCAP,
volume = "3",
number = "3",
pages = "14:1--14:??",
month = aug,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1236471.1236473",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "As the amount of multimedia data is increasing
day-by-day thanks to less expensive storage devices and
increasing numbers of information sources, machine
learning algorithms are faced with large-sized and
noisy datasets. Fortunately, the use of a good sampling
set for training influences the final results
significantly. But using a simple random sample (SRS)
may not obtain satisfactory results because such a
sample may not adequately represent the large and noisy
dataset due to its blind approach in selecting samples.
The difficulty is particularly apparent for huge
datasets where, due to memory constraints, only very
small sample sizes are used. This is typically the case
for multimedia applications, where data size is usually
very large. In this article we propose a new and
efficient method to sample of large and noisy
multimedia data. The proposed method is based on a
simple distance measure that compares the histograms of
the sample set and the whole set in order to estimate
the representativeness of the sample. The proposed
method deals with noise in an elegant manner which SRS
and other methods are not able to deal with. We
experiment on image and audio datasets. Comparison with
SRS and other methods shows that the proposed method is
vastly superior in terms of sample representativeness,
particularly for small sample sizes although time-wise
it is comparable to SRS, the least expensive method in
terms of time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "14",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "audio event identification; histogram; image
classification; noise; sampling",
}
@Article{Zhou:2007:CCO,
author = "Suiping Zhou and Wentong Cai and Stephen J. Turner and
Bu-Sung Lee and Junhu Wei",
title = "Critical causal order of events in distributed virtual
environments",
journal = j-TOMCCAP,
volume = "3",
number = "3",
pages = "15:1--15:??",
month = aug,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1236471.1236474",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We investigate the causal order of events in
distributed virtual environments (DVEs). We first
define the critical causal order relation among the
events. Then, we propose some mechanisms to enhance the
prevalent RO (receive order delivery) mechanism in DVEs
so that the real-time property of DVEs is preserved
while the critical causal order violations are reduced.
These mechanisms are implemented as a middleware.
Experimental results show that the middleware performs
well in reducing the critical causality violations in
simulation and incurs little processing overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "15",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "causal order; distributed simulation; virtual
environments",
}
@Article{Li:2007:SRM,
author = "Chuanjun Li and S. Q. Zheng and B. Prabhakaran",
title = "Segmentation and recognition of motion streams by
similarity search",
journal = j-TOMCCAP,
volume = "3",
number = "3",
pages = "16:1--16:??",
month = aug,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1236471.1236475",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Fast and accurate recognition of motion data streams
from gesture sensing and motion capture devices has
many applications and is the focus of this article.
Based on the analysis of the geometric structures
revealed by singular value decompositions (SVD) of
motion data, a similarity measure is proposed for
simultaneously segmenting and recognizing motion
streams. A direction identification approach is
explored to further differentiate motions with similar
data geometric structures. Experiments show that the
proposed similarity measure can segment and recognize
motion streams of variable lengths with high accuracy,
without knowing beforehand the number of motions in a
stream.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "16",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "gesture recognition; motion capture; pattern analysis;
principal component analysis; segmentation; similarity
measures; singular value decomposition",
}
@Article{Ott:2007:OAT,
author = "David E. Ott and Ketan Mayer-Patel",
title = "An open architecture for transport-level protocol
coordination in distributed multimedia applications",
journal = j-TOMCCAP,
volume = "3",
number = "3",
pages = "17:1--17:??",
month = aug,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1236471.1236476",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We consider the problem of flow coordination in
distributed multimedia applications. Most
transport-level protocols are designed to operate
independently and lack mechanisms for sharing
information with other flows and coordinating data
transport in various ways. This limitation becomes
problematic in distributed applications that employ
numerous flows between two computing clusters sharing
the same intermediary forwarding path across the
Internet. In this article, we propose an open
architecture that supports the sharing of network state
information, peer flow information, and
application-specific information. Called simply the
coordination protocol (CP), the scheme facilitates
coordination of network resource usage across flows
belonging to the same application, as well as aiding
other types of coordination. The effectiveness of our
approach is illustrated in the context of
multistreaming in 3D tele-immersion where consistency
of network information across flows both greatly
improves frame transport synchrony and minimizes
buffering delay.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "17",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "distributed applications; flow coordination; network
protocols",
}
@Article{Sakr:2007:RCB,
author = "Ziad Sakr and Nicolas D. Georganas",
title = "Robust content-based {MPEG}-4 {XMT} scene structure
authentication and multimedia content location",
journal = j-TOMCCAP,
volume = "3",
number = "3",
pages = "18:1--18:??",
month = aug,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1236471.1236477",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:10:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "For the past decade, there have been numerous research
works focusing on the protection of digital images,
audio, video, 3D virtual scenes, and software data from
unauthorized use and distribution. With the emerging
technology of the MPEG-4 standard, MPEG-4 scenes that
may include images, video, audio, and 3D objects can
easily be built using the text-based MPEG-4 XMT
standard. XMT allows content authors to exchange their
content with other authors, tools, or service providers
and facilitates interoperability with MPEG-4, X3D, and
SMIL. In order for owners and designers to protect
and/or authenticate their work, some form of security
needs to be applied into the MPEG-4 XMT structure and
its media content. Unlike images or videos,
watermarking an XMT structure is not an easy task,
since the structure contains no noise components to
embed the watermark. This article is the first one
proposing a novel robust algorithm for the
authentication of a given MPEG-4 XMT structured scene
and the location of its multimedia content.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "18",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "MPEG-4; multimedia; polynomial; pseudorandom
sequences; steganography; VRML; watermarking; XML;
XMT",
}
@Article{Ghinea:2007:ISI,
author = "Gheorghita Ghinea and Chabane Djeraba and Stephen
Gulliver and Kara Pernice Coyne",
title = "Introduction to special issue on eye-tracking
applications in multimedia systems",
journal = j-TOMCCAP,
volume = "3",
number = "4",
pages = "1:1--1:4",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1314303.1314304",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:11:20 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Colombo:2007:RTR,
author = "Carlo Colombo and Dario Comanducci and Alberto {Del
Bimbo}",
title = "Robust tracking and remapping of eye appearance with
passive computer vision",
journal = j-TOMCCAP,
volume = "3",
number = "4",
pages = "2:1--2:20",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1314303.1314305",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:11:20 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "A single-camera iris-tracking and remapping approach
based on passive computer vision is presented. Tracking
is aimed at obtaining accurate and robust measurements
of the iris/pupil position. To this purpose, a robust
method for ellipse fitting is used, employing search
constraints so as to achieve better performance with
respect to the standard RANSAC algorithm. Tracking also
embeds an iris localization algorithm (working as a
bootstrap multiple-hypotheses generation step), and a
blink detector that can detect voluntary eye blinks in
human-computer interaction applications. On-screen
remapping incorporates a head-tracking method capable
of compensating for small user-head movements. The
approach operates in real time under different light
conditions and in the presence of distractors. An
extensive set of experiments is presented and
discussed. In particular, an evaluation method for the
choice of layout of both hardware components and
calibration points is described. Experiments also
investigate the importance of providing a visual
feedback to the user, and the benefits gained from
performing head compensation, especially during
image-to-screen map calibration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "eye blink detection; eye tracking and remapping;
eye-driven human-computer interaction; robust fitting",
}
@Article{Wang:2007:UGP,
author = "Jun Wang and Lijun Yin and Jason Moore",
title = "Using geometric properties of topographic manifold to
detect and track eyes for human-computer interaction",
journal = j-TOMCCAP,
volume = "3",
number = "4",
pages = "3:1--3:20",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1314303.1314306",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:11:20 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Automatic eye detection and tracking is an important
component for advanced human-computer interface design.
Accurate eye localization can help develop a successful
system for face recognition and emotion identification.
In this article, we propose a novel approach to detect
and track eyes using geometric surface features on
topographic manifold of eye images. First, in the joint
spatial-intensity domain, a facial image is treated as
a 3D terrain surface or image topographic manifold. In
particular, eye regions exhibit certain intrinsic
geometric traits on this topographic manifold, namely,
the pit -labeled center and hillside -like surround
regions. Applying a terrain classification procedure on
the topographic manifold of facial images, each
location of the manifold can be labeled to generate a
terrain map. We use the distribution of terrain labels
to represent the eye terrain pattern. The Bhattacharyya
affinity is employed to measure the distribution
similarity between two topographic manifolds. Based on
the Bhattacharyya kernel, a support vector machine is
applied for selecting proper eye pairs from the
pit-labeled candidates. Second, given detected eyes on
the first frame of a video sequence, a
mutual-information-based fitting function is defined to
describe the similarity between two terrain surfaces of
neighboring frames. By optimizing the fitting function,
eye locations are updated for subsequent frames. The
distinction of the proposed approach lies in that both
eye detection and eye tracking are performed on the
derived topographic manifold, rather than on an
original-intensity image domain. The robustness of the
approach is demonstrated under various imaging
conditions and with different facial appearances, using
both static images and video sequences without
background constraints.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Bhattacharyya affinity; eye detection; eye tracking;
mutual information; topographic manifold",
}
@Article{Agrafiotis:2007:TEC,
author = "D. Agrafiotis and S. J. C. Davies and N. Canagarajah
and D. R. Bull",
title = "Towards efficient context-specific video coding based
on gaze-tracking analysis",
journal = j-TOMCCAP,
volume = "3",
number = "4",
pages = "4:1--4:15",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1314303.1314307",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:11:20 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article discusses a framework for model-based,
context-dependent video coding based on exploitation of
characteristics of the human visual system. The system
utilizes variable-quality coding based on priority maps
which are created using mostly context-dependent rules.
The technique is demonstrated through two case studies
of specific video context, namely open signed content
and football sequences. Eye-tracking analysis is
employed for identifying the characteristics of each
context, which are subsequently exploited for coding
purposes, either directly or through a gaze prediction
model. The framework is shown to achieve a considerable
improvement in coding efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "applications; context-based video coding; eye
tracking; multimedia perceptual quality; subjective
video quality; transformation of eye movements into
useful knowledge",
}
@Article{Urruty:2007:DEF,
author = "Thierry Urruty and Stanislas Lew and Nacim Ihadaddene
and Dan A. Simovici",
title = "Detecting eye fixations by projection clustering",
journal = j-TOMCCAP,
volume = "3",
number = "4",
pages = "5:1--5:20",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1314303.1314308",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:11:20 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Eye movements are certainly the most natural and
repetitive movement of a human being. The most mundane
activity, such as watching television or reading a
newspaper, involves this automatic activity which
consists of shifting our gaze from one point to
another.\par
Identification of the components of eye movements
(fixations and saccades) is an essential part in the
analysis of visual behavior because these types of
movements provide the basic elements used by further
investigations of human vision.\par
However, many of the algorithms that detect fixations
present a number of problems. In this article, we
present a new fixation identification technique that is
based on clustering of eye positions, using projections
and projection aggregation applied to static pictures.
We also present a new method that computes dispersion
of eye fixations in videos considering a multiuser
environment.\par
To demonstrate the performance and usefulness of our
approach we discuss our experimental work with two
different applications: on fixed image and video.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "eye fixations; interaction modeling; projected
clustering; static pictures; videos",
}
@Article{Duchowski:2007:FGC,
author = "Andrew T. Duchowski and Arzu {\c{C}}{\"o}ltekin",
title = "Foveated gaze-contingent displays for peripheral {LOD}
management, {$3$D} visualization, and stereo imaging",
journal = j-TOMCCAP,
volume = "3",
number = "4",
pages = "6:1--6:18",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1314303.1314309",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:11:20 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Advancements in graphics hardware have allowed
development of hardware-accelerated imaging displays.
This article reviews techniques for real-time
simulation of arbitrary visual fields over still images
and video. The goal is to provide the vision sciences
and perceptual graphics communities techniques for the
investigation of fundamental processes of visual
perception. Classic gaze-contingent displays used for
these purposes are reviewed and for the first time a
pixel shader is introduced for display of a
high-resolution window over peripherally degraded
stimulus. The pixel shader advances current
state-of-the-art by allowing real-time processing of
still or streamed images, obviating the need for
preprocessing or storage.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "eye tracking; foveation; gaze-contingent displays;
level-of-detail",
}
@Article{Loschky:2007:HLC,
author = "Lester C. Loschky and Gary S. Wolverton",
title = "How late can you update gaze-contingent
multiresolutional displays without detection?",
journal = j-TOMCCAP,
volume = "3",
number = "4",
pages = "7:1--7:10",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1314303.1314310",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:11:20 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This study investigated perceptual disruptions in
gaze-contingent multiresolutional displays (GCMRDs) due
to delays in updating the center of highest resolution
after an eye movement. GCMRDs can be used to save
processing resources and transmission bandwidth in many
types of single-user display applications, such as
virtual reality, video-telephony, simulators, and
remote piloting. The current study found that image
update delays as late as 60 ms after an eye movement
did not significantly increase the detectability of
image blur and/or motion transients due to the update.
This is good news for designers of GCMRDs, since 60 ms
is ample time to update many GCMRDs after an eye
movement without disrupting perception. The study also
found that longer eye movements led to greater blur
and/or transient detection due to moving the eyes
further into the low-resolution periphery, effectively
reducing the image resolution at fixation prior to the
update. In GCMRD applications where longer saccades are
more likely (e.g., displays with relatively large
distances between objects), this problem could be
overcome by increasing the size of the region of
highest resolution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "area of interest; bandwidth; blur detection; contrast
thresholds; display updates; eye movements; eye
tracking; foveated; foveation; gaze-contingent;
level-of-detail; multiresolution; perceptual
compression; peripheral vision; saccades; saccadic
suppression; visual perception",
}
@Article{Murray:2007:AEG,
author = "Norman Murray and Dave Roberts and Anthony Steed and
Paul Sharkey and Paul Dickerson and John Rae",
title = "An assessment of eye-gaze potential within immersive
virtual environments",
journal = j-TOMCCAP,
volume = "3",
number = "4",
pages = "8:1--8:17",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1314303.1314311",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:11:20 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In collaborative situations, eye gaze is a critical
element of behavior which supports and fulfills many
activities and roles. In current computer-supported
collaboration systems, eye gaze is poorly supported.
Even in a state-of-the-art video conferencing system
such as the access grid, although one can see the face
of the user, much of the communicative power of eye
gaze is lost. This article gives an overview of some
preliminary work that looks towards integrating eye
gaze into an immersive collaborative virtual
environment and assessing the impact that this would
have on interaction between the users of such a system.
Three experiments were conducted to assess the efficacy
of eye gaze within immersive virtual environments. In
each experiment, subjects observed on a large screen
the eye-gaze behavior of an avatar. The eye-gaze
behavior of that avatar had previously been recorded
from a user with the use of a head-mounted eye tracker.
The first experiment was conducted to assess the
difference between users' abilities to judge what
objects an avatar is looking at with only head gaze
being viewed and also with eye- and head-gaze data
being displayed. The results from the experiment show
that eye gaze is of vital importance to the subjects,
correctly identifying what a person is looking at in an
immersive virtual environment. The second experiment
examined whether a monocular or binocular eye-tracker
would be required. This was examined by testing
subjects' ability to identify where an avatar was
looking from their eye direction alone, or by eye
direction combined with convergence. This experiment
showed that convergence had a significant impact on the
subjects' ability to identify where the avatar was
looking. The final experiment looked at the effects of
stereo and mono-viewing of the scene, with the subjects
being asked to identify where the avatar was looking.
This experiment showed that there was no difference in
the subjects' ability to detect where the avatar was
gazing. This is followed by a description of how the
eye-tracking system has been integrated into an
immersive collaborative virtual environment and some
preliminary results from the use of such a system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "eye gaze; immersive virtual environments",
}
@Article{Rachovides:2007:CIM,
author = "Dorothy Rachovides and James Walkerdine and Peter
Phillips",
title = "The conductor interaction method",
journal = j-TOMCCAP,
volume = "3",
number = "4",
pages = "9:1--9:23",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1314303.1314312",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:11:20 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Computers have increasingly become part of our
everyday lives, with many activities either involving
their direct use or being supported by one. This has
prompted research into developing methods and
mechanisms to assist humans in interacting with
computers (human-computer interaction, or HCI). A
number of HCI techniques have been developed over the
years, some of which are quite old but continue to be
used, and some more recent and still evolving. Many of
these interaction techniques, however, are not natural
in their use and typically require the user to learn a
new means of interaction. Inconsistencies within these
techniques and the restrictions they impose on user
creativity can also make such interaction techniques
difficult to use, especially for novice users.\par
This article proposes an alternative interaction
method, the conductor interaction method (CIM), which
aims to provide a more natural and easier-to-learn
interaction technique. This novel interaction method
extends existing HCI methods by drawing upon techniques
found in human-human interaction. It is argued that the
use of a two-phased multimodal interaction mechanism,
using gaze for selection and gesture for manipulation,
incorporated within a metaphor-based environment, can
provide a viable alternative for interacting with a
computer (especially for novice users). Both the model
and an implementation of the CIM within a system are
presented in this article. This system formed the basis
of a number of user studies that have been performed to
assess the effectiveness of the CIM, the findings of
which are discussed in this work.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "gaze- and gesture-based interfaces; human-computer
interaction",
}
@Article{Luo:2008:IFH,
author = "Hangzai Luo and Yuli Gao and Xiangyang Xue and Jinye
Peng and Jianping Fan",
title = "Incorporating feature hierarchy and boosting to
achieve more effective classifier training and
concept-oriented video summarization and skimming",
journal = j-TOMCCAP,
volume = "4",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324287.1324288",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:06 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "For online medical education purposes, we have
developed a novel scheme to incorporate the results of
semantic video classification to select the most
representative video shots for generating
concept-oriented summarization and skimming of surgery
education videos. First, salient objects are used as
the video patterns for feature extraction to achieve a
good representation of the intermediate video
semantics. The salient objects are defined as the
salient video compounds that can be used to
characterize the most significant perceptual properties
of the corresponding real world physical objects in a
video, and thus the appearances of such salient objects
can be used to predict the appearances of the relevant
semantic video concepts in a specific video domain.
Second, a novel multi-modal boosting algorithm is
developed to achieve more reliable video classifier
training by incorporating feature hierarchy and
boosting to dramatically reduce both the training cost
and the size of training samples, thus it can
significantly speed up SVM (support vector machine)
classifier training. In addition, the unlabeled samples
are integrated to reduce the human efforts on labeling
large amount of training samples. Finally, the results
of semantic video classification are incorporated to
enable concept-oriented video summarization and
skimming. Experimental results in a specific domain of
surgery education videos are provided.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "1",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "concept-oriented video skimming; feature hierarchy;
multi-modal boosting; salient objects; semantic video
classification; unlabeled samples",
}
@Article{Hefeeda:2008:RDO,
author = "Mohamed Hefeeda and Cheng-Hsin Hsu",
title = "Rate-distortion optimized streaming of fine-grained
scalable video sequences",
journal = j-TOMCCAP,
volume = "4",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324287.1324289",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:06 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We present optimal schemes for allocating bits of
fine-grained scalable video sequences among multiple
senders streaming to a single receiver. This allocation
problem is critical in optimizing the perceived quality
in peer-to-peer and distributed multi-server streaming
environments. Senders in such environments are
heterogeneous in their outgoing bandwidth and they hold
different portions of the video stream. We first
formulate and optimally solve the problem for
individual frames, then we generalize to the multiple
frame case. Specifically, we formulate the allocation
problem as an optimization problem, which is nonlinear
in general. We use rate-distortion models in the
formulation to achieve the minimum distortion in the
rendered video, constrained by the outgoing bandwidth
of senders, availability of video data at senders, and
incoming bandwidth of receiver. We show how the adopted
rate-distortion models transform the nonlinear problem
to an integer linear programming (ILP) problem. We then
design a simple rounding scheme that transforms the ILP
problem to a linear programming (LP) one, which can be
solved efficiently using common optimization techniques
such as the Simplex method. We prove that our rounding
scheme always produces a feasible solution, and the
solution is within a negligible margin from the optimal
solution. We also propose a new algorithm (FGSAssign)
for the single-frame allocation problem that runs in $
O(n \log n) $ steps, where n is the number of senders.
We prove that FGSAssign is optimal. Furthermore, we
propose a heuristic algorithm (mFGSAssign) that
produces near-optimal solutions for the multiple-frame
case, and runs an order of magnitude faster than the
optimal one. Because of its short running time,
mFGSAssign can be used in real time. Our experimental
study validates our analytical analysis and shows the
effectiveness of our allocation algorithms in improving
the video quality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "2",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "distributed streaming; FGS; fine-grained scalable
streaming; peer-to-peer streaming; rate-distortion
models; rate-distortion optimized streaming; video
streaming",
}
@Article{Babich:2008:VQE,
author = "Fulvio Babich and Marco D'orlando and Francesca
Vatta",
title = "Video quality estimation in wireless {IP} networks:
{Algorithms} and applications",
journal = j-TOMCCAP,
volume = "4",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324287.1324290",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:06 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article proposes three methods to estimate the
distortion deriving from packet losses in wireless
video communication. The proposed methods take into
account the short-term properties of the encoded video
sequences. A suitable set of functions is adopted to
model the distortion envelope resulting from multiple
losses. The estimated performance is compared with the
actual distortion, evaluated by decoding the received
sequence with a properly designed decoder. Numerical
results confirm the accuracy of the proposed models in
approximating the actual Mean Square Error (MSE) for a
wide range of loss rates. Some applications of the
proposed algorithms are presented.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "3",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "distortion estimation; error-concealment;
error-resilience; H.264; packet loss rate; real time
video; wireless networks",
}
@Article{Kotharu:2008:PQR,
author = "Phani S. Kotharu and B. Prabhakaran",
title = "Partial query resolution for animation authoring",
journal = j-TOMCCAP,
volume = "4",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324287.1324291",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:06 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Animations are a part of multimedia and techniques
such as motion mapping and inverse kinematics aid in
reusing models and motion sequences to create new
animations. This reuse approach is facilitated by the
use of content-based retrieval techniques that often
require fuzzy query resolution. Most fuzzy query
resolution approaches work on all the attributes of the
query to minimize the database access cost thus
resulting in an unsatisfactory result set. It turns out
that the query resolution can be carried out in a
partial manner to achieve user satisfactory results and
aid in easy authoring. In this article, we present two
partial fuzzy query resolution approaches, one that
results in high-quality animations and the other that
produces results with decreasing number of satisfied
conditions in the query.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "4",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "aggregation function; animation toolkit; fuzzy query;
multimedia authoring; partial ordering; top-k query",
}
@Article{Ip:2008:RRS,
author = "Alan T. S. Ip and John C. S. Lui and Jiangchuan Liu",
title = "A revenue-rewarding scheme of providing incentive for
cooperative proxy caching for media streaming systems",
journal = j-TOMCCAP,
volume = "4",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324287.1324292",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:06 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Network entities cooperating together can improve
system performance of media streaming. In this paper,
we address the ``incentive issue'' of a cooperative
proxy caching system and how to motivate each proxy to
provide cache space to the system. To encourage proxies
to participate, we propose a ``revenue-rewarding
scheme'' to credit the cooperative proxies according to
the resources they contribute. A game-theoretic model
is used to analyze the interactions among proxies under
the revenue-rewarding scheme. We propose two
cooperative game settings that lead to optimal
situations. In particular, (1) We propose a distributed
incentive framework for peers to participate in
resource contribution for media streaming; (2) Proxies
are encouraged to cooperate under the revenue-rewarding
scheme; (3) Profit and social welfare are maximized in
these cooperative games; and (4) Cost-effective
resource allocation is achieved in these cooperative
games. Large scale simulation is carried out to
validate and verify the merits of our proposed
incentive schemes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "5",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "game-theoretic analysis; incentive mechanism; Nash
equilibrium; pricing; resource allocation",
}
@Article{Zhang:2008:AEE,
author = "Cha Zhang and Yong Rui and Jim Crawford and Li-Wei
He",
title = "An automated end-to-end lecture capture and
broadcasting system",
journal = j-TOMCCAP,
volume = "4",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324287.1324293",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:06 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Remote viewing of lectures presented to a live
audience is becoming increasingly popular. At the same
time, the lectures can be recorded for subsequent
on-demand viewing over the Internet. Providing such
services, however, is often prohibitive due to the
labor-intensive cost of capturing and
pre/post-processing. This article presents a complete
automated end-to-end system that supports capturing,
broadcasting, viewing, archiving and searching of
presentations. Specifically, we describe a system
architecture that minimizes the pre- and
post-production time, and a fully automated lecture
capture system called iCam2 that synchronously captures
all contents of the lecture, including audio, video,
and presentation material. No staff is needed during
lecture capture and broadcasting, so the operational
cost of the system is negligible. The system has been
used on a daily basis for more than 4 years, during
which 522 lectures have been captured. These lectures
have been viewed over 20,000 times.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "6",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "automated lecture capture; lecture broadcasting;
live/on-demand broadcasting",
}
@Article{Nguyen:2008:OIV,
author = "Giang Phuong Nguyen and Marcel Worring",
title = "Optimization of interactive visual-similarity-based
search",
journal = j-TOMCCAP,
volume = "4",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324287.1324294",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:06 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "At one end of the spectrum, research in interactive
content-based retrieval concentrates on machine
learning methods for effective use of relevance
feedback. On the other end, the information
visualization community focuses on effective methods
for conveying information to the user. What is lacking
is research considering the information visualization
and interactive retrieval as truly integrated parts of
one content-based search system. In such an integrated
system, there are many degrees of freedom like the
similarity function, the number of images to display,
the image size, different visualization modes, and
possible feedback modes. To base the optimal values for
all of those on user studies is unfeasible. We
therefore develop search scenarios in which tasks and
user actions are simulated. From there, the proposed
scheme is optimized based on objective constraints and
evaluation criteria. In such a manner, the degrees of
freedom are reduced and the remaining degrees can be
evaluated in user studies. In this article, we present
a system that integrates advanced similarity based
visualization with active learning. We have performed
extensive experimentation on interactive category
search with different image collections. The results
using the proposed simulation scheme show that indeed
the use of advanced visualization and active learning
pays off in all of these datasets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "7",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "active learning; interactive search; similarity based
visualization",
}
@Article{Hlavacs:2008:HVP,
author = "Helmut Hlavacs and Shelley Buchinger",
title = "Hierarchical video patching with optimal server
bandwidth",
journal = j-TOMCCAP,
volume = "4",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324287.1324295",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:06 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Video patching is a way for transporting true
video-on-demand, that is, instantaneous without any
delay, from a video server to several clients. Instead
of sending a unique stream to each newly arriving
client, clients share as many multicast transmissions
as possible, and are serviced only those parts of the
video that they have missed.\par
We present a novel video patching scheme using
hierarchies of patches. Our scheme minimizes the
bandwidth needed by the video server, and may result in
the fact that clients receive several streams in
parallel. We show analytically that for Poisson arrival
our algorithm achieves the optimal possible server
bandwidth for all schemes where clients share multicast
transmissions.\par
We also show, how our approach can be combined with
batching. This combination requires less server
bandwidth than all fixed start point periodic broadcast
algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "8",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "batching; server bandwidth; true video-on-demand;
video patching",
}
@Article{Chen:2008:ASD,
author = "Songqing Chen and Shiping Chen and Huiping Guo and Bo
Shen and Sushil Jajodia",
title = "Achieving simultaneous distribution control and
privacy protection for {Internet} media delivery",
journal = j-TOMCCAP,
volume = "4",
number = "2",
pages = "9:1--9:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1352012.1352013",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Massive Internet media distribution demands prolonged
continuous consumption of networking and disk
bandwidths in large capacity. Many proxy-based Internet
media distribution algorithms and systems have been
proposed, implemented, and evaluated to address the
scalability and performance issue. However, few of them
have been used in practice, since two important issues
are not satisfactorily addressed. First, existing
proxy-based media distribution architectures lack an
efficient media distribution control mechanism. Without
copyright protection, content providers are hesitant to
use proxy-based fast distribution techniques. Second,
little has been done to protect client privacy during
content accesses on the Internet. Straightforward
solutions to address these two issues independently
lead to conflicts. For example, to enforce distribution
control, only legitimate users should be granted access
rights. However, this normally discloses more
information (such as which object the client is
accessing) other than the client identity, which
conflicts with the client's desire for privacy
protection. In this article, we propose a unified
proxy-based media distribution protocol to effectively
address these two problems simultaneously. We further
design a set of new algorithms in a cooperative proxy
environment where our proposed scheme works efficiently
and practically. Simulation-based experiments are
conducted to extensively evaluate the proposed system.
Preliminary results demonstrate the effectiveness of
our proposed strategy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "9",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "cooperative proxy; distribution control; media
delivery; privacy; proxy caching",
}
@Article{Li:2008:FSE,
author = "Rui Li and Bir Bhanu and Anlei Dong",
title = "Feature synthesized {EM} algorithm for image
retrieval",
journal = j-TOMCCAP,
volume = "4",
number = "2",
pages = "10:1--10:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1352012.1352014",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "As a commonly used unsupervised learning algorithm in
Content-Based Image Retrieval (CBIR),
Expectation-Maximization (EM) algorithm has several
limitations, including the curse of dimensionality and
the convergence at a local maximum. In this article, we
propose a novel learning approach, namely
Coevolutionary Feature Synthesized
Expectation-Maximization (CFS-EM), to address the above
problems. The CFS-EM is a hybrid of coevolutionary
genetic programming (CGP) and EM algorithm applied on
partially labeled data. CFS-EM is especially suitable
for image retrieval because the images can be searched
in the synthesized low-dimensional feature space, while
a kernel-based method has to make classification
computation in the original high-dimensional space.
Experiments on real image databases show that CFS-EM
outperforms Radial Basis Function Support Vector
Machine (RBF-SVM), CGP, Discriminant-EM (D-EM) and
Transductive-SVM (TSVM) in the sense of classification
performance and it is computationally more efficient
than RBF-SVM in the query phase.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "10",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "coevolutionary feature synthesis; content-based image
retrieval; expectation maximization; semi-supervised
learning",
}
@Article{Xu:2008:AKG,
author = "Min Xu and Changsheng Xu and Lingyu Duan and Jesse S.
Jin and Suhuai Luo",
title = "Audio keywords generation for sports video analysis",
journal = j-TOMCCAP,
volume = "4",
number = "2",
pages = "11:1--11:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1352012.1352015",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Sports video has attracted a global viewership.
Research effort in this area has been focused on
semantic event detection in sports video to facilitate
accessing and browsing. Most of the event detection
methods in sports video are based on visual features.
However, being a significant component of sports video,
audio may also play an important role in semantic event
detection. In this paper, we have borrowed the concept
of the ``keyword'' from the text mining domain to
define a set of specific audio sounds. These specific
audio sounds refer to a set of game-specific sounds
with strong relationships to the actions of players,
referees, commentators, and audience, which are the
reference points for interesting sports events. Unlike
low-level features, audio keywords can be considered as
a mid-level representation, able to facilitate
high-level analysis from the semantic concept point of
view. Audio keywords are created from low-level audio
features with learning by support vector machines. With
the help of video shots, the created audio keywords can
be used to detect semantic events in sports video by
Hidden Markov Model (HMM) learning. Experiments on
creating audio keywords and, subsequently, event
detection based on audio keywords have been very
encouraging. Based on the experimental results, we
believe that the audio keyword is an effective
representation that is able to achieve satisfying
results for event detection in sports video.
Application in three sports types demonstrates the
practicality of the proposed method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "11",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "audio keywords; event detection; semantics analysis;
sports video analysis; support vector machines",
}
@Article{Tullimas:2008:MSU,
author = "Sunand Tullimas and Thinh Nguyen and Rich Edgecomb and
Sen-ching Cheung",
title = "Multimedia streaming using multiple {TCP}
connections",
journal = j-TOMCCAP,
volume = "4",
number = "2",
pages = "12:1--12:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1352012.1352016",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In recent years, multimedia applications over the
Internet become increasingly popular. However, packet
loss, delay, and time-varying bandwidth of the Internet
have remained the major problems for multimedia
streaming applications. As such, a number of
approaches, including network infrastructure and
protocol, source and channel coding, have been proposed
to either overcome or alleviate these drawbacks of the
Internet. In this article, we propose the MultiTCP
system, a receiver-driven, TCP-based system for
multimedia streaming over the Internet. Our proposed
algorithm aims at providing resilience against short
term insufficient bandwidth by using multiple TCP
connections for the same application. Our proposed
system enables the application to achieve and control
the desired sending rate during congested periods,
which cannot be achieved using traditional TCP.
Finally, our proposed system is implemented at the
application layer, and hence, no kernel modification to
TCP is necessary. We analyze the proposed system, and
present simulation and experimental results to
demonstrate its advantages over the traditional
single-TCP-based approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "12",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "multimedia streaming",
}
@Article{Tjondronegoro:2008:SES,
author = "Dian Tjondronegoro and Yi-Ping Phoebe Chen and Adrien
Joly",
title = "A scalable and extensible segment-event-object-based
sports video retrieval system",
journal = j-TOMCCAP,
volume = "4",
number = "2",
pages = "13:1--13:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1352012.1352017",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Sport video data is growing rapidly as a result of the
maturing digital technologies that support digital
video capture, faster data processing, and large
storage. However, (1) semi-automatic content extraction
and annotation, (2) scalable indexing model, and (3)
effective retrieval and browsing, still pose the most
challenging problems for maximizing the usage of large
video databases. This article will present the findings
from a comprehensive work that proposes a scalable and
extensible sports video retrieval system with two major
contributions in the area of sports video indexing and
retrieval. The first contribution is a new sports video
indexing model that utilizes semi-schema-based indexing
scheme on top of an Object-Relationship approach. This
indexing model is scalable and extensible as it enables
gradual index construction which is supported by
ongoing development of future content extraction
algorithms. The second contribution is a set of novel
queries which are based on XQuery to generate dynamic
and user-oriented summaries and event structures. The
proposed sports video retrieval system has been fully
implemented and populated with soccer, tennis,
swimming, and diving video. The system has been
evaluated against 20 users to demonstrate and confirm
its feasibility and benefits. The experimental sports
genres were specifically selected to represent the four
main categories of sports domain: period-, set-point-,
time (race)-, and performance-based sports. Thus, the
proposed system should be generic and robust for all
types of sports.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "13",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "automatic content extraction; indexing; mobile video
interaction; MPEG-7; sports video retrieval; video
database system; XML; XQuery",
}
@Article{Zimmermann:2008:DMP,
author = "Roger Zimmermann and Elaine Chew and Sakire Arslan Ay
and Moses Pawar",
title = "Distributed musical performances: {Architecture} and
stream management",
journal = j-TOMCCAP,
volume = "4",
number = "2",
pages = "14:1--14:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1352012.1352018",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "An increasing number of novel applications produce a
rich set of different data types that need to be
managed efficiently and coherently. In this article we
present our experience with designing and implementing
a data management infrastructure for a distributed
immersive performance (DIP) application. The DIP
project investigates a versatile framework for the
capture, recording, and replay of video, audio, and
MIDI (Musical Instrument Digital Interface) streams in
an interactive environment for collaborative music
performance. We are focusing on two classes of data
streams that are generated within this environment. The
first category consists of high-resolution isochronous
media streams, namely audio and video. The second class
comprises MIDI data produced by electronic instruments.
MIDI event sequences are alphanumeric in nature and
fall into the category of the data streams that have
been of interest to data management researchers in
recent years.\par
We present our data management architecture, which
provides a repository for all DIP data. Streams of both
categories need to be acquired, transmitted, stored,
and replayed in real time. Data items are correlated
across different streams with temporal indices. The
audio and video streams are managed in our own
High-performance Data Recording Architecture (HYDRA),
which integrates multistream recording and retrieval in
a consistent manner. This paper reports on the
practical issues and challenges that we encountered
during the design, implementation and experimental
phases of our prototype. We also present some analysis
results and discuss future extensions for the
architecture.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "14",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "distributed immersive performance; multimedia storage;
multimodal data recorder; networked musical
performance",
}
@Article{Hsu:2008:ACR,
author = "Cheng-Hsin Hsu and Mohamed Hefeeda",
title = "On the accuracy and complexity of rate-distortion
models for fine-grained scalable video sequences",
journal = j-TOMCCAP,
volume = "4",
number = "2",
pages = "15:1--15:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1352012.1352019",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Rate-distortion (R-D) models are functions that
describe the relationship between the bitrate and
expected level of distortion in the reconstructed video
stream. R-D models enable optimization of the received
video quality in different network conditions. Several
R-D models have been proposed for the increasingly
popular fine-grained scalable video sequences. However,
the models' relative performance has not been
thoroughly analyzed. Moreover, the time complexity of
each model is not known, nor is the range of bitrates
in which the model produces valid results. This lack of
quantitative performance analysis makes it difficult to
select the model that best suits a target streaming
system. In this article, we classify, analyze, and
rigorously evaluate all R-D models proposed for FGS
coders in the literature. We classify R-D models into
three categories: analytic, empirical, and
semi-analytic. We describe the characteristics of each
category. We analyze the R-D models by following their
mathematical derivations, scrutinizing the assumptions
made, and explaining when the assumptions fail and why.
In addition, we implement all R-D models, a total of
eight, and evaluate them using a diverse set of video
sequences. In our evaluation, we consider various
source characteristics, diverse channel conditions,
different encoding/decoding parameters, different frame
types, and several performance metrics including
accuracy, range of applicability, and time complexity
of each model. We also present clear systematic ways
(pseudo codes) for constructing various R-D models from
a given video sequence. Based on our experimental
results, we present a justified list of recommendations
on selecting the best R-D models for video-on-demand,
video conferencing, real-time, and peer-to-peer
streaming systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "15",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "fine-grained scalable coding; multimedia streaming;
rate-distortion models",
}
@Article{Wang:2008:MST,
author = "Bing Wang and Jim Kurose and Prashant Shenoy and Don
Towsley",
title = "Multimedia streaming via {TCP}: an analytic
performance study",
journal = j-TOMCCAP,
volume = "4",
number = "2",
pages = "16:1--16:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1352012.1352020",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Jun 16 17:12:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "TCP is widely used in commercial multimedia streaming
systems, with recent measurement studies indicating
that a significant fraction of Internet streaming media
is currently delivered over HTTP/TCP. These
observations motivate us to develop analytic
performance models to systematically investigate the
performance of TCP for both live and stored-media
streaming. We validate our models via ns simulations
and experiments conducted over the Internet. Our models
provide guidelines indicating the circumstances under
which TCP streaming leads to satisfactory performance,
showing, for example, that TCP generally provides good
streaming performance when the achievable TCP
throughput is roughly twice the media bitrate, with
only a few seconds of startup delay.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "16",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "multimedia streaming; performance modeling",
}
@Article{Lin:2008:NNB,
author = "Tsungnan Lin and Chiapin Wang and Po-Chiang Lin",
title = "A neural-network-based context-aware handoff algorithm
for multimedia computing",
journal = j-TOMCCAP,
volume = "4",
number = "3",
pages = "17:1--17:??",
month = aug,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1386109.1386110",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The access of multimedia computing in wireless
networks is concerned with the performance of handoff
because of the irretrievable property of real-time data
delivery. To lessen throughput degradation incurred by
unnecessary handoffs or handoff latencies leading to
media disruption perceived by users, this paper
presents a link quality based handoff algorithm. Neural
networks are used to learn the cross-layer correlation
between the link quality estimator such as packet
success rate and the corresponding context metric
indicators, for example, the transmitting packet
length, received signal strength, and signal to noise
ratio. Based on a pre-processed learning of link
quality profile, neural networks make essential handoff
decisions efficiently with the evaluations of link
quality instead of the comparisons between relative
signal strength. The experiment and simulation results
show that the proposed algorithm improves the user
perceived qualities in a transmission scenario of VoIP
applications by minimizing both the number of lost
packets and unnecessary handoffs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "17",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "context-aware; handoff; Multimedia computing; neural
networks",
}
@Article{Franke:2008:TAC,
author = "Ingmar S. Franke and Sebastian Pannasch and Jens R.
Helmert and Robert Rieger and Rainer Groh and Boris M.
Velichkovsky",
title = "Towards attention-centered interfaces: an aesthetic
evaluation of perspective with eye tracking",
journal = j-TOMCCAP,
volume = "4",
number = "3",
pages = "18:1--18:??",
month = aug,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1386109.1386111",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The established method of representing
three-dimensional space on a two-dimensional surface
involves camera based, point of regard systems,
comparable in design to the early ``camera obscura''.
However, geometrical limitations of such models lead to
distortions of perspective when projected. This
research investigated the influence of single- versus
multi-perspectives on aesthetic choices within one
image. A clear perceptual bias towards
multi-perspective images was found, additionally
supported by an eye tracking study. We propose that
human users are more attracted by multi-perspective
images, which emphasize the ``semantic foci'' of the
scene, than by those being synthesized statically with
only one geometrical prospect.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "18",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Eye tracking; perspective projection; scene
perception; subjective evaluation",
}
@Article{Wu:2008:ELS,
author = "Chuan Wu and Baochun Li and Shuqiao Zhao",
title = "Exploring large-scale peer-to-peer live streaming
topologies",
journal = j-TOMCCAP,
volume = "4",
number = "3",
pages = "19:1--19:??",
month = aug,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1386109.1386112",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Real-world live peer-to-peer (P2P) streaming
applications have been successfully deployed in the
Internet, delivering live multimedia content to
millions of users at any given time. With relative
simplicity in design with respect to peer selection and
topology construction protocols and without much
algorithmic sophistication, current-generation live P2P
streaming applications are able to provide users with
adequately satisfying viewing experiences. That said,
little existing research has provided sufficient
insights on the time-varying internal characteristics
of peer-to-peer topologies in live streaming. This
article presents {\em Magellan}, our collaborative work
with UUSee Inc., Beijing, China, for exploring and
charting graph theoretical properties of practical P2P
streaming topologies, gaining important insights in
their topological dynamics over a long period of
time.\par
With more than 120 GB worth of traces starting
September 2006 from a commercially deployed P2P live
streaming system that represents UUSee's core product,
we have completed a thorough and in-depth investigation
of the topological properties in large-scale live P2P
streaming, as well as their evolutionary behavior over
time, for example, at different times of the day and in
flash crowd scenarios. We seek to explore real-world
P2P streaming topologies with respect to their graph
theoretical metrics, such as the degree, clustering
coefficient, and reciprocity. In addition, we compare
our findings with results from existing studies on
topological properties of P2P file sharing
applications, and present new and unique observations
specific to streaming. We have observed that live P2P
streaming sessions demonstrate excellent scalability, a
high level of reciprocity, a clustering phenomenon in
each ISP, and a degree distribution that does {\em
not\/} follow the power-law distribution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "19",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Peer-to-peer streaming; topology characterization",
}
@Article{Goel:2008:LLA,
author = "Ashvin Goel and Charles Krasic and Jonathan Walpole",
title = "Low-latency adaptive streaming over {TCP}",
journal = j-TOMCCAP,
volume = "4",
number = "3",
pages = "20:1--20:??",
month = aug,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1386109.1386113",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Media streaming over TCP has become increasingly
popular because TCP's congestion control provides
remarkable stability to the Internet. Streaming over
TCP requires adapting to bandwidth availability, but
unfortunately, TCP can introduce significant latency at
the application level, which causes unresponsive and
poor adaptation. This article shows that this latency
is not inherent in TCP but occurs as a result of
throughput-optimized TCP implementations. We show that
this latency can be minimized by dynamically tuning
TCP's send buffer. Our evaluation shows that this
approach leads to better application-level adaptation
and it allows supporting interactive and other
low-latency applications over TCP.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "20",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "low latency streaming; multimedia applications; TCP",
}
@Article{Lim:2008:DPP,
author = "Seung-Ho Lim and Yo-Won Jeong and Kyu Ho Park",
title = "Data placement and prefetching with accurate bit rate
control for interactive media server",
journal = j-TOMCCAP,
volume = "4",
number = "3",
pages = "21:1--21:??",
month = aug,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1386109.1386114",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "An interactive Media Server should support
unrestricted control to viewers with their service
level agreements. It is important to manage video data
effectively to facilitate efficient retrieval. In this
paper, we propose an efficient placement algorithm as
part of an effective retrieval scheme to increase the
number of clients who can be provided with interactive
service. The proposed management schemes are
incorporated with a bit count control method that is
based on repeated tuning of quantization parameters to
adjust the actual bit count to the target bit count.
The encoder using this method can generate coded frames
whose sizes are synchronized with the RAID stripe size,
so that when various fast-forward levels are accessed
we can reduce the seek and rotational latency and
enhance the disk throughput of each disk in the RAID
system. Experimental results demonstrate that the
proposed schemes can significantly improve the average
service time and guarantee more users service of
quality, and the interactive media server can thereby
efficiently service a large number of clients.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "21",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "bit count control; disk array; Interactive media
server; stripe size; video rate",
}
@Article{Jie:2008:VGD,
author = "Li Jie and James J. Clark",
title = "Video game design using an eye-movement-dependent
model of visual attention",
journal = j-TOMCCAP,
volume = "4",
number = "3",
pages = "22:1--22:??",
month = aug,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1386109.1386115",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Eye movements can be used to infer the allocation of
covert attention. In this article, we propose to model
the allocation of attention in a task-dependent manner
based on different eye movement conditions,
specifically fixation and pursuit. We show that the
image complexity at eye fixation points during
fixation, and the pursuit direction during pursuit are
significant factors in attention allocation. Results of
the study are applied to the design of an interactive
computer game. Real-time eye movement information is
taken as one of inputs for the game. The utility of
such eye information for controlling game difficulty is
shown.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "22",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Entertainment; eye movements; eye tracking; HCI; video
games; visual attention",
}
@Article{Komogortsev:2008:PRT,
author = "Oleg V. Komogortsev and Javed I. Khan",
title = "Predictive real-time perceptual compression based on
eye-gaze-position analysis",
journal = j-TOMCCAP,
volume = "4",
number = "3",
pages = "23:1--23:??",
month = aug,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1386109.1386116",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article designs a real-time perceptual
compression system (RTPCS) based on eye-gaze-position
analysis. Our results indicate that the
eye-gaze-position containment metric provides more
efficient and effective evaluation of an RTPCS than the
eye fixation containment. The presented RTPCS is
designed for a network communication scenario with a
feedback loop delay. The proposed RTPCS uses human
visual system properties to compensate for the delay
and to provide high ratios of multimedia compression.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "23",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "human visual system; Real-time multimedia
compression",
}
@Article{Cesar:2008:ISI,
author = "Pablo Cesar and Dick C. A. Bulterman and Luiz Fernando
Gomes Soares",
title = "Introduction to special issue: {Human-centered}
television --- directions in interactive digital
television research",
journal = j-TOMCCAP,
volume = "4",
number = "4",
pages = "24:1--24:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1412196.1412197",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:32 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The research area of interactive digital TV is in the
midst of a significant revival. Unlike the first
generation of digital TV, which focused on producer
concerns that effectively limited (re)distribution, the
current generation of research is closely linked to the
role of the user in selecting, producing, and
distributing content. The research field of interactive
digital television is being transformed into a study of
human-centered television. Our guest editorial reviews
relevant aspects of this transformation in the three
main stages of the content lifecycle: content
production, content delivery, and content consumption.
While past research on content production tools focused
on full-fledged authoring tools for professional
editors, current research studies lightweight, often
informal end-user authoring systems. In terms of
content delivery, user-oriented infrastructures such as
peer-to-peer are being seen as alternatives to more
traditional broadcast solutions. Moreover, end-user
interaction is no longer limited to content selection,
but now facilitates nonlinear participatory television
productions. Finally, user-to-user communication
technologies have allowed television to become a
central component of an interconnected social
experience. The background context given in this
article provides a framework for appreciating the
significance of four detailed contributions that
highlight important directions in transforming
interactive television research.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "24",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Interactive television; shared experiences; standards;
survey",
}
@Article{Ursu:2008:ITN,
author = "Marian F. Ursu and Maureen Thomas and Ian Kegel and
Doug Williams and Mika Tuomola and Inger Lindstedt and
Terence Wright and Andra Leurdijk and Vilmos Zsombori
and Julia Sussner and Ulf Myrestam and Nina Hall",
title = "Interactive {TV} narratives: {Opportunities},
progress, and challenges",
journal = j-TOMCCAP,
volume = "4",
number = "4",
pages = "25:1--25:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1412196.1412198",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:32 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article is motivated by the question whether
television should do more than simply offer interactive
services alongside (and separately from) traditional
linear programs, in the context of its dominance being
seriously challenged and threatened by interactive
forms of screen media entertainment. It suggests: yes.
Interactive {\em narrativity}, that is, the ability to
interact with (and influence) stories whilst they are
being told, represents one clear development path for
interactive television. The capabilities of computing
technology are ripe for exploring this new form of
storytelling, from creation to commercial distribution.
The article starts by looking at the relationship
between narrativity and interactivity in the current
context of screen media, and identifies clear signs of
interest from certain European public broadcasters in
interactive TV narratives. It then presents in detail
four recent experimental interactive TV productions in
the genres of drama, news, and documentary, developed
in collaboration with public broadcasters, which
illustrate the potential and richness of this new form
of storytelling, but also highlight new technological
capabilities necessary for such productions. A number
of essential technological requirements are then
discussed in more detail in the final part. The article
suggests that the ShapeShifting Media Technology,
employed in the implementation of the four productions,
has made significant advances both at the technological
and the creative ends in supporting the development of
interactive TV narrativity, but, however, that further
developments are required before being able to answer
questions such as ``Would end users want such a form of
screen media entertainment?'' and ``Would it be
effective for both end users and producers?''",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "25",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "computational narrativity; digital storytelling;
entertainment; Interactive; media; narrativity;
nonlinear; screen media; shapeshifting; television",
}
@Article{Cheng:2008:GIP,
author = "Bin Cheng and Lex Stein and Hai Jin and Xiaofei Liao
and Zheng Zhang",
title = "{GridCast}: {Improving} peer sharing for {P2P VoD}",
journal = j-TOMCCAP,
volume = "4",
number = "4",
pages = "26:1--26:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1412196.1412199",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:32 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Video-on-Demand (VoD) is a compelling application, but
costly. VoD is costly due to the load it places on
video source servers. Many have proposed using
peer-to-peer (P2P) techniques to shift load from
servers to peers. Yet, nobody has implemented and
deployed a system to openly and systematically evaluate
how these techniques work.\par
This article describes the design, implementation and
evaluation of GridCast, a real deployed P2P VoD system.
GridCast has been live on CERNET since May of 2006. It
provides seek, pause, and play operations, and employs
peer sharing to improve system scalability. In peak
months, GridCast has served videos to 23,000 unique
users. From the first deployment, we have gathered
information to understand the system and evaluate how
to further improve peer sharing through caching and
replication.\par
We first show that GridCast with single video caching
(SVC) can decrease load on source servers by an average
of 22\% from a client-server architecture. We analyze
the net effect on system resources and determine that
peer upload is largely idle. This leads us to changing
the caching algorithm to cache multiple videos (MVC).
MVC decreases source load by an average of 51\% over
the client-server. The improvement is greater as user
load increases. This bodes well for peer-assistance at
larger scales.\par
A detailed analysis of MVC shows that departure misses
become a major issue in a P2P VoD system with caching
optimization. Motivated by this observation, we examine
how to use replication to eliminate departure misses
and further reduce server load. A framework for lazy
replication is presented and evaluated in this article.
In this framework, two predictors are plugged in to
create the working replication algorithm. With these
two simple predictors, lazy replication can decrease
server load by 15\% from MVC with only a minor increase
in network traffic.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "26",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "caching; peer-to-peer; replication; Video-on-demand",
}
@Article{Metcalf:2008:EPL,
author = "Crysta Metcalf and Gunnar Harboe and Joe Tullio and
Noel Massey and Guy Romano and Elaine M. Huang and
Frank Bentley",
title = "Examining presence and lightweight messaging in a
social television experience",
journal = j-TOMCCAP,
volume = "4",
number = "4",
pages = "27:1--27:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1412196.1412200",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:32 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We report on a field evaluation of a prototype social
television system (Social TV) that incorporates
lightweight messaging as well as ambient awareness of
user presence on the system. This evaluation was
conducted over a two-week period and involved the
participation of ten households. Participants
appreciated the ability to see their buddies' presence
on the system, the ability to see or suggest the
programs they were currently watching, and the ability
to send short messages to one another. The presence
facilities available in Social TV also allowed
participants to learn more about one another's TV
viewing habits and preferences, and fostered a sense of
connectedness between them. However, they also felt
constrained by the limitations of the communication
options available to them and demanded free-form text
or voice chat to be able to fully express themselves.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "27",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "ambient displays; awareness displays;
computer-mediated communication; Social television",
}
@Article{Cattelan:2008:WCP,
author = "Renan G. Cattelan and Cesar Teixeira and Rudinei
Goularte and Maria Da Gra{\c{c}}a C. Pimentel",
title = "Watch-and-comment as a paradigm toward ubiquitous
interactive video editing",
journal = j-TOMCCAP,
volume = "4",
number = "4",
pages = "28:1--28:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1412196.1412201",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:32 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The literature reports research efforts allowing the
editing of interactive TV multimedia documents by
end-users. In this article we propose complementary
contributions relative to end-user generated
interactive video, video tagging, and collaboration. In
earlier work we proposed the {\em watch-and-comment\/}
(WaC) paradigm as the seamless capture of an
individual's comments so that corresponding annotated
interactive videos be automatically generated. As a
proof of concept, we implemented a prototype
application, the WaCTool, that supports the capture of
digital ink and voice comments over individual frames
and segments of the video, producing a declarative
document that specifies both: different media stream
structure and synchronization.\par
In this article, we extend the WaC paradigm in two
ways. First, user-video interactions are associated
with edit commands and digital ink operations. Second,
focusing on collaboration and distribution issues, we
employ annotations as simple containers for context
information by using them as tags in order to organize,
store and distribute information in a P2P-based
multimedia capture platform. We highlight the design
principles of the watch-and-comment paradigm, and
demonstrate related results including the current
version of the WaCTool and its architecture. We also
illustrate how an interactive video produced by the
WaCTool can be rendered in an interactive video
environment, the Ginga-NCL player, and include results
from a preliminary evaluation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "28",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Annotation; Ginga-NCL; interactive digital video; P2P
collaboration",
}
@Article{Bailey:2008:SSA,
author = "Brian P. Bailey and Nicu Sebe and Alan Hanjalic",
title = "Special section from the {ACM Multimedia Conference
2007}",
journal = j-TOMCCAP,
volume = "5",
number = "1",
pages = "1:1--1:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1404880.1404881",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:49 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "1",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Gleicher:2008:RCI,
author = "Michael L. Gleicher and Feng Liu",
title = "Re-cinematography: {Improving} the camerawork of
casual video",
journal = j-TOMCCAP,
volume = "5",
number = "1",
pages = "2:1--2:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1404880.1404882",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:49 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents an approach to postprocessing
casually captured videos to improve apparent camera
movement. {\em Re-cinematography\/} transforms each
frame of a video such that the video better follows
cinematic conventions. The approach breaks a video into
shorter segments. Segments of the source video where
there is no intentional camera movement are made to
appear as if the camera is completely static. For
segments with camera motions, camera paths are
keyframed automatically and interpolated with matrix
logarithms to give velocity-profiled movements that
appear intentional and directed. Closeups are inserted
to provide compositional variety in otherwise uniform
segments. The approach automatically balances the
tradeoff between motion smoothness and distortion to
the original imagery. Results from our prototype show
improvements to poor quality home videos.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "2",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "casual video; cinematography; Image stabilization",
}
@Article{Qi:2008:CMV,
author = "Guo-Jun Qi and Xian-Sheng Hua and Yong Rui and Jinhui
Tang and Tao Mei and Meng Wang and Hong-Jiang Zhang",
title = "Correlative multilabel video annotation with temporal
kernels",
journal = j-TOMCCAP,
volume = "5",
number = "1",
pages = "3:1--3:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1404880.1404883",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:49 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Automatic video annotation is an important ingredient
for semantic-level video browsing, search and
navigation. Much attention has been paid to this topic
in recent years. These researches have evolved through
two paradigms. In the first paradigm, each concept is
individually annotated by a pre-trained binary
classifier. However, this method ignores the rich
information between the video concepts and only
achieves limited success. Evolved from the first
paradigm, the methods in the second paradigm add an
extra step on the top of the first individual
classifiers to fuse the multiple detections of the
concepts. However, the performance of these methods can
be degraded by the error propagation incurred in the
first step to the second fusion one. In this article,
another paradigm of the video annotation method is
proposed to address these problems. It simultaneously
annotates the concepts as well as model correlations
between them in one step by the proposed {\em
Correlative Multilabel\/} (CML) method, which benefits
from the compensation of complementary information
between different labels. Furthermore, since the video
clips are composed by temporally ordered frame
sequences, we extend the proposed method to exploit the
rich temporal information in the videos. Specifically,
a temporal-kernel is incorporated into the CML method
based on the discriminative information between {\em
Hidden Markov Models\/} (HMMs) that are learned from
the videos. We compare the performance between the
proposed approach and the state-of-the-art approaches
in the first and second paradigms on the widely used
TRECVID data set. As to be shown, superior performance
of the proposed method is gained.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "3",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "concept correlation; multilabeling; temporal kernel;
Video annotation",
}
@Article{Chen:2008:DDN,
author = "Yinpeng Chen and Weiwei Xu and Hari Sundaram and
Thanassis Rikakis and Sheng-Min Liu",
title = "A dynamic decision network framework for online media
adaptation in stroke rehabilitation",
journal = j-TOMCCAP,
volume = "5",
number = "1",
pages = "4:1--4:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1404880.1404884",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:49 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article, we present a media adaptation
framework for an immersive biofeedback system for
stroke patient rehabilitation. In our biofeedback
system, media adaptation refers to changes in
audio/visual feedback as well as changes in physical
environment. Effective media adaptation frameworks help
patients recover generative plans for arm movement with
potential for significantly shortened therapeutic time.
The media adaptation problem has significant challenges
--- (a) high dimensionality of adaptation parameter
space; (b) variability in the patient performance
across and within sessions; (c) the actual
rehabilitation plan is typically a non-first-order
Markov process, making the learning task hard.\par
Our key insight is to understand media adaptation as a
real-time feedback control problem. We use a
mixture-of-experts based Dynamic Decision Network (DDN)
for online media adaptation. We train DDN mixtures per
patient, per session. The mixture models address two
basic questions --- (a) given a specific adaptation
suggested by the domain experts, predict the patient
performance, and (b) given the expected performance,
determine the optimal adaptation decision. The
questions are answered through an optimality criterion
based search on DDN models trained in previous
sessions. We have also developed new validation metrics
and have very good results for both questions on actual
stroke rehabilitation data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "4",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Biofeedback; dynamic decision network; media
adaptation; mixture of experts",
}
@Article{Thouin:2008:EAV,
author = "Frederic Thouin and Mark Coates",
title = "Equipment allocation in video-on-demand network
deployments",
journal = j-TOMCCAP,
volume = "5",
number = "1",
pages = "5:1--5:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1404880.1404885",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:49 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Video-on-Demand (VoD) services are very user-friendly,
but also complex and resource demanding. Deployments
involve careful design of many mechanisms where content
attributes and usage models should be taken into
account. We define, and propose a methodology to solve,
the {\em VoD Equipment Allocation Problem\/} of
determining the number and type of streaming servers
with directly attached storage (VoD servers) to install
at each potential location in a metropolitan area
network topology such that deployment costs are
minimized. We develop a cost model for VoD deployments
based on streaming, storage and transport costs and
train a parametric function that maps the amount of
available storage to a worst-case hit ratio. We observe
the impact of having to determine the amount of storage
and streaming cojointly, and determine the minimum
demand required to deploy replicas as well as the
average hit ratio at each location. We observe that
common video-on-demand server configurations lead to
the installation of excessive storage, because a
relatively high hit-ratio can be achieved with small
amounts of storage so streaming requirements
dominate.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "5",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "equipment allocation; optimization; resource
allocation; Video-on-demand",
}
@Article{Kolan:2008:NLV,
author = "Prakash Kolan and Ram Dantu and Jo{\~a}o W. Cangussu",
title = "Nuisance level of a voice call",
journal = j-TOMCCAP,
volume = "5",
number = "1",
pages = "6:1--6:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1404880.1404886",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:49 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In our everyday life, we communicate with many people
such as family, friends, neighbors, and colleagues. We
communicate with them using different communication
media such as email, telephone calls, and face-to-face
interactions. While email is not real-time and
face-to-face communications require geographic
proximity, voice and video communications are preferred
over other modes of communication. However, real-time
voice/video calls may create nuisance to the receiver.
In this article, we describe a mathematical model for
computing nuisance level of incoming voice/video calls.
We computed the closeness and nuisance level using the
calling patterns between the caller and the callee. To
validate the nuisance model, we collected cell phone
call records of real-life people at our university and
computed the nuisance value for all voice calls. We
validated the nuisance levels using the feedback from
those real-life people. Such a nuisance model is useful
for predicting unwanted voice and video sessions in an
IP communication network.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "6",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "behavior; Multimedia communications; nuisance;
presence; security; tolerance; unwantedness",
}
@Article{Zheng:2008:CVP,
author = "Qing-Fang Zheng and Wen Gao",
title = "Constructing visual phrases for effective and
efficient object-based image retrieval",
journal = j-TOMCCAP,
volume = "5",
number = "1",
pages = "7:1--7:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1404880.1404887",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:49 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The explosion of multimedia data necessitates
effective and efficient ways for us to get access to
our desired ones. In this article, we draw an analogy
between image retrieval and text retrieval and propose
a visual phrase-based approach to retrieve images
containing desired objects (object-based image
retrieval). The visual phrase is defined as a pair of
frequently co-occurred adjacent local image patches and
is constructed using data mining. We design methods on
how to construct visual phrase and how to index/search
images based on visual phrase. We demonstrate
experiments to show our visual phrase-based approach
can be very efficient and more effective than current
visual word-based approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "7",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Content-based image retrieval; inverted index; local
image descriptor; object-based image retrieval; SIFT;
visual phrase",
}
@Article{Gill:2008:SDM,
author = "Phillipa Gill and Liqi Shi and Anirban Mahanti and
Zongpeng Li and Derek L. Eager",
title = "Scalable on-demand media streaming for heterogeneous
clients",
journal = j-TOMCCAP,
volume = "5",
number = "1",
pages = "8:1--8:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1404880.1404888",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:49 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Periodic broadcast protocols enable efficient
streaming of highly popular media files to large
numbers of concurrent clients. Most previous periodic
broadcast protocols, however, assume that all clients
can receive at the same rate, and also assume that
reception bandwidth is not time-varying. In this
article, we first develop a new periodic broadcast
protocol, Optimized Heterogeneous Periodic Broadcast
(OHPB), that can be optimized for a given population of
clients with heterogeneous reception bandwidths and
quality-of-service requirements. The OHPB protocol
utilizes an optimized segment size progression
determined by solving a linear optimization model that
takes as input the client population characteristics
and an objective function such as mean client startup
delay. We then develop a generalization of the OHPB
linear optimization model that allows optimal server
bandwidth allocation among multiple concurrent OHPB
broadcasts, wherein each media file and its clients may
have different characteristics. Finally, we propose
complementary client protocols employing work-ahead
buffering of data during playback, so as to enable more
uniform playback quality when the reception bandwidth
is time-varying.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "8",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "linear programming; periodic broadcasts;
quality-of-service; Scalable streaming",
}
@Article{Jung:2008:SSL,
author = "Dawoon Jung and Jaegeuk Kim and Jin-Soo Kim and
Joonwon Lee",
title = "{ScaleFFS}: a scalable log-structured flash file
system for mobile multimedia systems",
journal = j-TOMCCAP,
volume = "5",
number = "1",
pages = "9:1--9:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1404880.1404889",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:51:49 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "NAND flash memory has become one of the most popular
storage media for mobile multimedia systems. A key
issue in designing storage systems for mobile
multimedia systems is handling large-capacity storage
media and numerous large files with limited resources
such as memory. However, existing flash file systems,
including JFFS2 and YAFFS in particular, exhibit many
limitations in addressing the storage capacity of
mobile multimedia systems.\par
In this article, we design and implement a scalable
flash file system, called ScaleFFS, for mobile
multimedia systems. ScaleFFS is designed to require
only a small fixed amount of memory space and to
provide fast mount time, even if the file system size
grows to more than tens of gigabytes. The measurement
results show that ScaleFFS can be instantly mounted
regardless of the file system size, while achieving the
same write bandwidth and up to 22\% higher read
bandwidth compared to JFFS2.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "9",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "File system; flash memory; NAND; storage system",
}
@Article{Moncrieff:2008:DPA,
author = "Simon Moncrieff and Svetha Venkatesh and Geoff West",
title = "Dynamic privacy assessment in a smart house
environment using multimodal sensing",
journal = j-TOMCCAP,
volume = "5",
number = "2",
pages = "10:1--10:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1413862.1413863",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:17 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Surveillance applications in private environments such
as smart houses require a privacy management policy if
such systems are to be accepted by the occupants of the
environment. This is due to the invasive nature of
surveillance, and the private nature of the home. In
this article, we propose a framework for dynamically
altering the privacy policy applied to the monitoring
of a smart house based on the situation within the
environment. Initially the situation, or context,
within the environment is determined; we identify
several factors for determining environmental context,
and propose methods to quantify the context using audio
and binary sensor data. The context is then mapped to
an appropriate privacy policy, which is implemented by
applying data hiding techniques to control access to
data gathered from various information sources. The
significance of this work lies in the examination of
privacy issues related to assisted-living smart house
environments. A single privacy policy in such
applications would be either too restrictive for an
observer, for example, a carer, or too invasive for the
occupants. We address this by proposing a dynamic
method, with the aim of decreasing the invasiveness of
the technology, while retaining the purpose of the
system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "10",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Assisted living; audio; context aware; privacy;
surveillance and monitoring",
}
@Article{Adams:2008:SUS,
author = "Brett Adams and Dinh Phung and Svetha Venkatesh",
title = "Sensing and using social context",
journal = j-TOMCCAP,
volume = "5",
number = "2",
pages = "11:1--11:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1413862.1413864",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:17 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We present online algorithms to extract social
context: Social spheres are labeled locations of
significance, represented as convex hulls extracted
from GPS traces. Colocation is determined from
Bluetooth and GPS to extract social rhythms, patterns
in time, duration, place, and people corresponding to
real-world activities. Social ties are formulated from
proximity and shared spheres and rhythms. Quantitative
evaluation is performed for 10+ million samples over 45
man-months. Applications are presented with assessment
of perceived utility: {\em Socio-Graph}, a video and
photo browser with filters for social metadata, and
{\em Jive}, a blog browser that uses rhythms to
discover similarity between entries automatically.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "11",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Multimedia browsing; social context",
}
@Article{Mohanty:2008:IWB,
author = "Saraju P. Mohanty and Bharat K. Bhargava",
title = "Invisible watermarking based on creation and robust
insertion-extraction of image adaptive watermarks",
journal = j-TOMCCAP,
volume = "5",
number = "2",
pages = "12:1--12:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1413862.1413865",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:17 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents a novel invisible robust
watermarking scheme for embedding and extracting a
digital watermark in an image. The novelty lies in
determining a perceptually important subimage in the
host image. Invisible insertion of the watermark is
performed in the most significant region of the host
image such that tampering of that portion with an
intention to remove or destroy will degrade the
esthetic quality and value of the image. One feature of
the algorithm is that this subimage is used as a region
of interest for the watermarking process and eliminates
the chance of watermark removal. Another feature of the
algorithm is the creation of a compound watermark using
the input user watermark (logo) and attributes of the
host image. This facilitates the homogeneous fusion of
a watermark with the cover image, preserves the quality
of the host image, and allows robust
insertion-extraction. Watermark creation consists of
two distinct phases. During the first phase, a
statistical image is synthesized from a perceptually
important subimage of the image. A compound watermark
is created by embedding a watermark (logo) into the
statistical synthetic image by using a visible
watermarking technique. This compound watermark is
invisibly embedded into the important block of the host
image. The authentication process involves extraction
of the perceptive logo as well statistical testing for
two-layer evidence. Results of the experimentation
using standard benchmarks demonstrates the robustness
and efficacy of the proposed watermarking approach.
Ownership proof could be established under various
hostile attacks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "12",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "content protection; copyright protection; image;
invisible watermarking; Watermarking",
}
@Article{Yiu:2008:ODC,
author = "Wai-Pun Ken Yiu and Shueng-Han Gary Chan",
title = "Offering data confidentiality for multimedia overlay
multicast: {Design} and analysis",
journal = j-TOMCCAP,
volume = "5",
number = "2",
pages = "13:1--13:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1413862.1413866",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:17 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Application layer multicast (ALM) has been proposed to
overcome current limitations in IP multicast for
large-group multimedia communication. We address
offering data confidentiality tailored for ALM. To
achieve confidentiality, a node may need to
continuously {\em re-encrypt\/} packets before
forwarding them downstream. Furthermore, keys have to
be changed whenever there is a membership change,
leading to {\em rekey\/} processing overhead at the
nodes. For a large and dynamic group, these
reencryption and rekeying operations incur high
processing overhead at the nodes. We propose and
analyze a scalable scheme called Secure Overlay
Multicast (SOM) which clusters ALM peers so as to
localize rekeying within a cluster and to limit
re-encryption at cluster boundaries, thereby minimizing
the total nodal processing overhead. We describe the
operations of SOM and compare its nodal processing
overhead with two other basic approaches, namely,
host-to-host encryption and whole group encryption. We
also present a simplified analytic model for SOM and
show that there exists an optimal cluster size to
minimize the total nodal processing overhead. By
comparing with a recently proposed ALM scheme (DT
protocol), SOM achieves a substantial reduction in
nodal processing overhead with similar network
performance in terms of network stress and delay.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "13",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Key management; multicast security; overlay multicast;
performance analysis",
}
@Article{Nakayama:2008:ECR,
author = "Minoru Nakayama and Yosiyuki Takahasi",
title = "Estimation of certainty for responses to
multiple-choice questionnaires using eye movements",
journal = j-TOMCCAP,
volume = "5",
number = "2",
pages = "14:1--14:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1413862.1413867",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:17 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "To examine the feasibility of estimating the degree of
strength of belief (SOB) of responses using eye
movements, the scan paths of eye movements were
analyzed while subjects reviewed their own responses to
multiple choice tasks. All fixation points of eye
movements were classified into visual areas, or cells,
which corresponded with the positions of answers. Two
estimation procedures are proposed using eye-movement
data. The first one is identifying SOB using scan-path
transitions. By comparing subject's reports of high and
low SOB and eye-movement estimations, a significant
correct rate of discrimination of SOB was observed.
When the threshold of discrimination was controlled, a
high rate of correct responses was obtained if it was
set at a low level.\par
The second procedure is conducting SOB discrimination
using support vector machines (SVM) trained with
features of fixations. Subject's gazing features were
analyzed while they reviewed their own responses. A
discrimination model for SOB was trained with several
combinations of features to see whether performance of
a significant level could be obtained. As a result, a
trained model with 3 features (which consist of
interval time, vertical difference, and length between
fixations) can provide significant discrimination
performance for SOB.\par
These results provide evidence that strength of belief
can be estimated using eye movements",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "14",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "certainty; Eye-movements; scan-path analysis; support
vector machines",
}
@Article{Shipman:2008:AVG,
author = "Frank Shipman and Andreas Girgensohn and Lynn Wilcox",
title = "Authoring, viewing, and generating hypervideo: an
overview of {Hyper-Hitchcock}",
journal = j-TOMCCAP,
volume = "5",
number = "2",
pages = "15:1--15:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1413862.1413868",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:17 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Hyper-Hitchcock consists of three components for
creating and viewing a form of interactive video called
detail-on-demand video: a hypervideo editor, a
hypervideo player, and algorithms for automatically
generating hypervideo summaries. Detail-on-demand video
is a form of hypervideo that supports one hyperlink at
a time for navigating between video sequences. The
Hyper-Hitchcock editor enables authoring of
detail-on-demand video without programming and uses
video processing to aid in the authoring process. The
Hyper-Hitchcock player uses labels and keyframes to
support navigation through and back hyperlinks.
Hyper-Hitchcock includes techniques for automatically
generating hypervideo summaries of one or more videos
that take the form of multiple linear summaries of
different lengths with links from the shorter to the
longer summaries. User studies on authoring and viewing
provided insight into the various roles of links in
hypervideo and found that player interface design
greatly affects people's understanding of hypervideo
structure and the video they access.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "15",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Hypervideo; link generation; video editing; video
summarization",
}
@Article{He:2008:EED,
author = "Wenbo He and Klara Nahrstedt and Xue Liu",
title = "End-to-end delay control of multimedia applications
over multihop wireless links",
journal = j-TOMCCAP,
volume = "5",
number = "2",
pages = "16:1--16:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1413862.1413869",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:17 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The proliferation of multimedia applications over
mobile, resource-constrained wireless networks has
raised the need for techniques that adapt these
applications both to clients' Quality of Service (QoS)
requirements and to network resource constraints. This
article investigates the upper-layer adaptation
mechanisms to achieve end-to-end delay control for
multimedia applications. The proposed adaptation
approach spans application layer, middleware layer and
network layer. In application layer, the requirement
adaptor dynamically changes the requirement levels
according to end-to-end delay measurement and
acceptable QoS requirements for the end-users. In
middleware layer, the priority adaptor is used to
dynamically adjust the service classes for applications
using feedback control theory. In network layer, the
service differentiation scheduler assigns different
network resources (e.g., bandwidth) to different
service classes. With the coordination of these three
layers, our approach can adaptively assign resources to
multimedia applications. To evaluate the impact of our
adaptation scheme, we built a real IEEE 802.11 ad hoc
network testbed. The test-bed experiments show that the
proposed upper-layer adaptation for end-to-end delay
control successfully adjusts multimedia applications to
meet delay requirements in many scenarios.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "16",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "End-to-end delay QoS; wireless ad hoc networks",
}
@Article{Pan:2008:CBM,
author = "Leon Pan and Chang N. Zhang",
title = "A criterion-based multilayer access control approach
for multimedia applications and the implementation
considerations",
journal = j-TOMCCAP,
volume = "5",
number = "2",
pages = "17:1--17:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1413862.1413870",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:17 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article, a novel criterion-based multilayer
access control (CBMAC) approach is presented to enhance
existing access control models such as Role-Based,
Mandatory, and Discretionary Access Control models to
support multilayer (multilevel) access control. The
proposed approach is based on a set of predefined
security criteria which are extracted from
authorization rules. The security attributes of objects
and users are specified by security criterion
expressions (serving as locks) and the elements
(serving as keys) of security criterion subsets
respectively. An object embedded with a number of
security criterion expressions becomes a secure object
while a user associated with a security criterion
subset is called a secure user. The multilayer access
control is achieved by evaluating the embedded security
criterion expressions (actuating locks) by the elements
(keys) in a user's security criterion subset. The paper
also provides the details of integrating the proposed
approach with existing access control models and
presents the implementation considerations of
Criterion-Based Role-Based Multilayer Access Control,
the integration of CBMAC and Role-Based Access
Control.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "17",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Multilayer access control; secure object; secure
permission; secure user; security criterion",
}
@Article{Candan:2009:ISS,
author = "K. Sel{\c{c}}uk Candan and Alberto {Del Bimbo} and
Carsten Griwodz and Alejandro Jaimes",
title = "Introduction to the special section for the best
papers of {ACM Multimedia 2008}",
journal = j-TOMCCAP,
volume = "5",
number = "3",
pages = "18:1--18:??",
month = aug,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1556134.1556135",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "18",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Cesar:2009:FTE,
author = "Pablo Cesar and Dick C. A. Bulterman and Jack Jansen
and David Geerts and Hendrik Knoche and William
Seager",
title = "Fragment, tag, enrich, and send: {Enhancing} social
sharing of video",
journal = j-TOMCCAP,
volume = "5",
number = "3",
pages = "19:1--19:??",
month = aug,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1556134.1556136",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The migration of media consumption to personal
computers retains distributed social viewing, but only
via nonsocial, strictly personal interfaces. This
article presents an architecture, and implementation
for media sharing that allows for enhanced social
interactions among users. Using a mixed-device model,
our work allows targeted, personalized enrichment of
content. All recipients see common content, while
differentiated content is delivered to individuals via
their personal secondary screens. We describe the
goals, architecture, and implementation of our system
in this article. In order to validate our results, we
also present results from two user studies involving
disjoint sets of test participants.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "19",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Asynchronous media sharing; differentiated content
enrichment; secondary screens",
}
@Article{Knoche:2009:BPS,
author = "H. Knoche and M. A. Sasse",
title = "The big picture on small screens delivering acceptable
video quality in mobile {TV}",
journal = j-TOMCCAP,
volume = "5",
number = "3",
pages = "20:1--20:??",
month = aug,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1556134.1556137",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Mobile TV viewers can change the viewing distance and
(on some devices) scale the picture to their preferred
viewing ratio, trading off size for angular resolution.
We investigated optimal trade-offs between size and
resolution through a series of studies. Participants
selected their preferred size and rated the
acceptability of the visual experience on a 200ppi
device at a 4:3 aspect ratio. They preferred viewing
ratios similar to living room TV setups regardless of
the much lower resolution: at a minimum 14 pixels per
degree. While traveling on trains people required
videos with a height larger than 35mm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "20",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Mobile multimedia consumption; resolution; size;
trade-off",
}
@Article{Mondet:2009:CPP,
author = "Sebastien Mondet and Wei Cheng and Geraldine Morin and
Romulus Grigoras and Frederic Boudon and Wei Tsang
Ooi",
title = "Compact and progressive plant models for streaming in
networked virtual environments",
journal = j-TOMCCAP,
volume = "5",
number = "3",
pages = "21:1--21:??",
month = aug,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1556134.1556138",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Just as in the real world, plants are important
objects in virtual worlds for creating pleasant and
realistic environments, especially those involving
natural scenes. As such, much effort has been made in
realistic modeling of plants. As the trend moves
towards networked and distributed virtual environments,
however, the current models are inadequate as they are
not designed for progressive transmissions. In this
article, we fill in this gap by proposing a progressive
representation for plants based on generalized
cylinders. We model the shape and thickness of branches
in a plant as B{\'e}zier curves, group the curves
according to the similarity, and differentially code
the curves to represent the plant in a compact and
progressive manner. To facilitate the transmission of
the plants, we quantify the visual contribution of each
branch and use this weight in packet scheduling. We
show the efficiency of our representations and the
effectiveness of our packet scheduler through
experiments over a wide area network.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "21",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "multiresolution; networked virtual environment; plant
models; progressive coding; progressive transmission;
Streaming",
}
@Article{Wei:2009:CCM,
author = "Yong Wei and Suchendra M. Bhandarkar and Kang Li",
title = "Client-centered multimedia content adaptation",
journal = j-TOMCCAP,
volume = "5",
number = "3",
pages = "22:1--22:??",
month = aug,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1556134.1556139",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The design and implementation of a client-centered
multimedia content adaptation system suitable for a
mobile environment comprising of resource-constrained
handheld devices or clients is described. The primary
contributions of this work are: (1) the overall
architecture of the client-centered content adaptation
system, (2) a data-driven multi-level Hidden Markov
model (HMM)-based approach to perform both video
segmentation and video indexing in a single pass, and
(3) the formulation and implementation of a
Multiple-choice Multidimensional Knapsack Problem
(MMKP)-based video personalization strategy. In order
to segment and index video data, a video stream is
modeled at both the semantic unit level and video
program level. These models are learned entirely from
training data and no domain-dependent knowledge about
the structure of video programs is used. This makes the
system capable of handling various kinds of videos
without having to manually redefine the program model.
The proposed MMKP-based personalization strategy is
shown to include more relevant video content in
response to the client's request than the existing 0/1
knapsack problem and fractional knapsack problem-based
strategies, and is capable of satisfying multiple
client-side constraints simultaneously. Experimental
results on CNN news videos and Major League Soccer
(MLS) videos are presented and analyzed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "22",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "hidden Markov models; multiple choice multidimensional
knapsack problem; video indexing; Video
personalization",
}
@Article{Sivaram:2009:DMS,
author = "G. S. V. S. Sivaram and Mohan S. Kankanhalli and K. R.
Ramakrishnan",
title = "Design of multimedia surveillance systems",
journal = j-TOMCCAP,
volume = "5",
number = "3",
pages = "23:1--23:??",
month = aug,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1556134.1556140",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article addresses the problem of how to select
the optimal combination of sensors and how to determine
their optimal placement in a surveillance region in
order to meet the given performance requirements at a
minimal cost for a multimedia surveillance system. We
propose to solve this problem by obtaining a
performance vector, with its elements representing the
performances of subtasks, for a given input combination
of sensors and their placement. Then we show that the
optimal sensor selection problem can be converted into
the form of Integer Linear Programming problem (ILP) by
using a linear model for computing the optimal
performance vector corresponding to a sensor
combination. Optimal performance vector corresponding
to a sensor combination refers to the performance
vector corresponding to the optimal placement of a
sensor combination. To demonstrate the utility of our
technique, we design and build a surveillance system
consisting of PTZ (Pan-Tilt-Zoom) cameras and active
motion sensors for capturing faces. Finally, we show
experimentally that optimal placement of sensors based
on the design maximizes the system performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "23",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Performance vector; sensor selection and placement",
}
@Article{Liu:2009:SSE,
author = "Xiaotao Liu and Mark Corner and Prashant Shenoy",
title = "{\em {SEVA\/}}: {Sensor-enhanced} video annotation",
journal = j-TOMCCAP,
volume = "5",
number = "3",
pages = "24:1--24:??",
month = aug,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1556134.1556141",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article, we study how a sensor-rich world can
be exploited by digital recording devices such as
cameras and camcorders to improve a user's ability to
search through a large repository of image and video
files. We design and implement a digital recording
system that records identities and locations of objects
(as advertised by their sensors) along with visual
images (as recorded by a camera). The process, which we
refer to as {\em Sensor-Enhanced Video Annotation
(SEVA)}, combines a series of correlation,
interpolation, and extrapolation techniques. It
produces a tagged stream that later can be used to
efficiently search for videos or frames containing
particular objects or people. We present detailed
experiments with a prototype of our system using both
stationary and mobile objects as well as GPS and
ultrasound. Our experiments show that: (i) SEVA has
zero error rates for static objects, except very close
to the boundary of the viewable area; (ii) for moving
objects or a moving camera, SEVA only misses objects
leaving or entering the viewable area by 1--2 frames;
(iii) SEVA can scale to 10 fast-moving objects using
current sensor technology; and (iv) SEVA runs online
using relatively inexpensive hardware.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "24",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "context-based retrieval; location-based services;
sensor-enhanced; Video annotation",
}
@Article{Wang:2009:MLS,
author = "Bing Wang and Wei Wei and Zheng Guo and Don Towsley",
title = "Multipath live streaming via {TCP}: {Scheme},
performance and benefits",
journal = j-TOMCCAP,
volume = "5",
number = "3",
pages = "25:1--25:??",
month = aug,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1556134.1556142",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Motivated by the wide use of TCP for multimedia
streaming in practice and the increasing availability
of multipath between end hosts, we study multipath live
streaming via TCP in this article. We first design a
simple and practical TCP-based multipath streaming
scheme, named {\em Dynamic MPath-streaming
(DMP-streaming)}, which dynamically distributes packets
over multiple paths by {\em implicitly inferring\/} the
available bandwidths on these paths. To allow
systematic performance study, we develop an analytical
model for DMP-streaming and validate the model using
extensive {\em ns\/} simulation and Internet
experiments. We explore the parameter space of this
model and find that DMP-streaming generally provides
satisfactory performance when the aggregate achievable
TCP throughput is 1.6 times the video bitrate, when
allowing a few seconds of startup delay. Last, we
comment on the benefits of using multipath versus
single path for TCP-based streaming.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "25",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "multimedia streaming; Performance modeling",
}
@Article{Li:2009:PBR,
author = "Mingzhe Li and Mark Claypool and Robert Kinicki",
title = "Playout buffer and rate optimization for streaming
over {IEEE 802.11} wireless networks",
journal = j-TOMCCAP,
volume = "5",
number = "3",
pages = "26:1--26:??",
month = aug,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1556134.1556143",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:52:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Most streaming rate selection and buffer optimization
algorithms are developed for wired networks and can
perform poorly over wireless networks. Wireless MAC
layer behavior, such as rate adaptation,
retransmissions, and medium sharing, can significantly
degrade the effectiveness of current streaming
algorithms. This article presents the Buffer and Rate
Optimization for Streaming (BROS) algorithm to improve
streaming performance. BROS uses a bandwidth estimation
tool designed specifically for wireless networks and
models the relationship between buffer size, streaming
data rate, and available bandwidth distribution. BROS
optimizes the streaming data rate and initial buffer
size, resulting in a high data rate but with few frame
losses and buffer underflow events, while still keeping
a small initial buffer delay. BROS is implemented in
the Emulated Streaming (EmuS) client-server system and
evaluated on an IEEE 802.11 wireless testbed with
various wireless conditions. The evaluation shows that
BROS can effectively optimize the streaming rate and
initial buffer size based on wireless network bandwidth
conditions, thus achieving better performance than
static rate or buffer selection and jitter removal
buffers.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "26",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Multimedia networking; playout buffer; streaming rate;
wireless networks",
}
@Article{Sauer:2009:MDC,
author = "Danielle Sauer and Yee-Hong Yang",
title = "Music-driven character animation",
journal = j-TOMCCAP,
volume = "5",
number = "4",
pages = "27:1--27:??",
month = oct,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1596990.1596991",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:53:03 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Music-driven character animation extracts musical
features from a song and uses them to create an
animation. This article presents a system that builds a
new animation directly from musical attributes, rather
than simply synchronizing it to the music like similar
systems. Using a simple script that identifies the
movements involved in the performance and their timing,
the user can easily control the animation of
characters. Another unique feature of the system is its
ability to incorporate multiple characters into the
same animation, both with synchronized and
unsynchronized movements. A system that integrates
Celtic dance movements is developed in this article. An
evaluation of the results shows that the majority of
animations are found to be appealing to viewers and
that altering the music can change the attractiveness
of the final result.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "27",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Character animation; motion synthesis; music analysis;
primitive movements",
}
@Article{Deng:2009:SCA,
author = "Robert H. Deng and Yanjiang Yang",
title = "A study of content authentication in proxy-enabled
multimedia delivery systems: {Model}, techniques, and
applications",
journal = j-TOMCCAP,
volume = "5",
number = "4",
pages = "28:1--28:??",
month = oct,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1596990.1596992",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:53:03 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Compared with the direct server-user approach, the
server-proxy-user architecture for multimedia delivery
promises significantly improved system scalability. The
introduction of the intermediary transcoding proxies
between content servers and end users in this
architecture, however, brings unprecedented challenges
to content security. In this article, we present a
systematic study on the end-to-end content
authentication problem in the server-proxy-user
context, where intermediary proxies transcode
multimedia content dynamically. We present a formal
model for the authentication problem, propose a
concrete construction for authenticating generic data
modality and formally prove its security. We then apply
the generic construction to authenticating specific
multimedia formats, for example, JPEG2000 code-streams
and MPEG-4 video streams. The prototype implementation
shows that our scheme is suitable for practical
applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "28",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "end-to-end authentication; Multimedia content
delivery; security",
}
@Article{Cha:2009:TVS,
author = "Jongeun Cha and Mohamad Eid and Abdulmotaleb {El
Saddik}",
title = "Touchable {$3$D} video system",
journal = j-TOMCCAP,
volume = "5",
number = "4",
pages = "29:1--29:??",
month = oct,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1596990.1596993",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:53:03 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Multimedia technologies are reaching the limits of
providing audio-visual media that viewers consume
passively. An important factor, which will ultimately
enhance the user's experience in terms of
impressiveness and immersion, is interaction. Among
daily life interactions, haptic interaction plays a
prominent role in enhancing the quality of experience
of users, and in promoting physical and emotional
development. Therefore, a critical step in multimedia
research is expected to bring the sense of touch, or
haptics, into multimedia systems and applications. This
article proposes a touchable 3D video system where
viewers can actively touch a video scene through a
force-feedback device, and presents the underlying
technologies in three functional components: (1)
contents generation, (2) contents transmission, and (3)
viewing and interaction. First of all, we introduce a
depth image-based haptic representation (DIBHR) method
that adds haptic and heightmap images, in addition to
the traditional depth image-based representation
(DIBR), to encode the haptic surface properties of the
video media. In this representation, the haptic image
contains the stiffness, static friction, and dynamic
friction, whereas the heightmap image contains
roughness of the video contents. Based on this
representation method, we discuss how to generate
synthetic and natural (real) video media through a 3D
modeling tool and a depth camera, respectively. Next,
we introduce a transmission mechanism based on the
MPEG-4 framework where new MPEG-4 BIFS nodes are
designed to describe the haptic scene. Finally, a
haptic rendering algorithm to compute the interaction
force between the scene and the viewer is described. As
a result, the performance of the haptic rendering
algorithm is evaluated in terms of computational time
and smooth contact force. It operates marginally within
a 1 kHz update rate that is required to provide stable
interaction force and provide smoother contact force
with the depth image that has high frequency
geometrical noise using a median filter.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "29",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "haptic rendering algorithm; Haptic surface properties;
video representation",
}
@Article{Benevenuto:2009:VIO,
author = "Fabr{\'\i}cio Benevenuto and Tiago Rodrigues and
Virgilio Almeida and Jussara Almeida and Keith Ross",
title = "Video interactions in online video social networks",
journal = j-TOMCCAP,
volume = "5",
number = "4",
pages = "30:1--30:??",
month = oct,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1596990.1596994",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:53:03 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article characterizes video-based interactions
that emerge from YouTube's video response feature,
which allows users to discuss themes and to provide
reviews for products or places using much richer media
than text. Based on crawled data covering a
representative subset of videos and users, we present a
characterization from two perspectives: the video
response view and the interaction network view. In
addition to providing valuable statistical models for
various characteristics, our study uncovers typical
user behavioral patterns in video-based environments
and shows evidence of opportunistic behavior.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "30",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "opportunistic behavior; promotion; social media;
social networks; video communication; Video
interactions; video spam; YouTube",
}
@Article{Erdmann:2009:IEB,
author = "Maike Erdmann and Kotaro Nakayama and Takahiro Hara
and Shojiro Nishio",
title = "Improving the extraction of bilingual terminology from
{Wikipedia}",
journal = j-TOMCCAP,
volume = "5",
number = "4",
pages = "31:1--31:??",
month = oct,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1596990.1596995",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:53:03 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Research on the automatic construction of bilingual
dictionaries has achieved impressive results. Bilingual
dictionaries are usually constructed from parallel
corpora, but since these corpora are available only for
selected text domains and language pairs, the potential
of other resources is being explored as well.\par
In this article, we want to further pursue the idea of
using Wikipedia as a corpus for bilingual terminology
extraction. We propose a method that extracts
term-translation pairs from different types of
Wikipedia link information. After that, an SVM
classifier trained on the features of manually labeled
training data determines the correctness of unseen
term-translation pairs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "31",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Bilingual dictionary; link analysis; Wikipedia
mining",
}
@Article{Carlsson:2010:SSL,
author = "Niklas Carlsson and Derek L. Eager",
title = "Server selection in large-scale video-on-demand
systems",
journal = j-TOMCCAP,
volume = "6",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1671954.1671955",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:53:23 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Video on demand, particularly with user-generated
content, is emerging as one of the most
bandwidth-intensive applications on the Internet. Owing
to content control and other issues, some
video-on-demand systems attempt to prevent downloading
and peer-to-peer content delivery. Instead, such
systems rely on server replication, such as via
third-party content distribution networks, to support
video streaming (or pseudostreaming) to their clients.
A major issue with such systems is the cost of the
required server resources.\par
By synchronizing the video streams for clients that
make closely spaced requests for the same video from
the same server, server costs (such as for retrieval of
the video data from disk) can be amortized over
multiple requests. A fundamental trade-off then arises,
however, with respect to server selection. Network
delivery cost is minimized by selecting the {\em
nearest\/} server, while server cost is minimized by
directing closely spaced requests for the same video to
a {\em common\/} server.\par
This article compares classes of server selection
policies within the context of a simple system model.
We conclude that: (i) server selection using dynamic
system state information (rather than only proximities
and average loads) can yield large improvements in
performance, (ii) deferring server selection for a
request as late as possible (i.e., until just before
streaming is to begin) can yield additional large
improvements, and (iii) within the class of policies
using dynamic state information and deferred selection,
policies using only ``local'' (rather than global)
request information are able to achieve most of the
potential performance gains.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "1",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "content distribution networks; modeling; Performance
analysis; server selection; video-on-demand",
}
@Article{Agarwal:2010:BRW,
author = "Parag Agarwal and Balakrishnan Prabhakaran",
title = "Blind robust watermarking of {$3$D} motion data",
journal = j-TOMCCAP,
volume = "6",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1671954.1671956",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:53:23 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The article addresses the problem of copyright
protection for 3D motion-captured data by designing a
robust blind watermarking mechanism. The mechanism
segments motion capture data and identifies clusters of
3D points per segment. A watermark can be embedded and
extracted within these clusters by using a proposed
extension of 3D quantization index modulation. The
watermarking scheme is blind in nature and the encoded
watermarks are shown to be imperceptible, and secure.
The resulting hiding capacity has bounds based on
cluster size. The watermarks are shown to be robust
against attacks such as uniform affine transformations
(scaling, rotation, and translation), cropping,
reordering, and noise addition. The time complexity for
watermark embedding and extraction is estimated as
O({\em n\/} log {\em n\/}) and O({\em n\/}$^2$ log {\em
n\/}), respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "2",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "blind; decoding; encoding; spatial; Watermarking",
}
@Article{Yang:2010:DMD,
author = "Bo Yang",
title = "{DSI}: a model for distributed multimedia semantic
indexing and content integration",
journal = j-TOMCCAP,
volume = "6",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1671954.1671957",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:53:23 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Considerable research has been done on the
content-based multimedia delivery and access in
distributed data repositories. As noted in the
literature, there is always a trade-off between
multimedia quality and access speed. In addition, the
overall performance is greatly determined by the
distribution of the multimedia data. In this article,
an unsupervised multimedia semantic integration
approach for a distributed infrastructure, the
Distributed Semantic Indexing (DSI), is presented that
addresses both the data quality and search performance.
With the ability of summarizing content information and
guiding data distribution, the proposed approach is
distinguished by: (1) logic-based representation and
concise abstraction of the semantic contents of
multimedia data, which are further integrated to form a
general overview of a multimedia data repository ---
content signature; (2) application of linguistic
relationships to construct a hierarchical metadata
based on the content signatures allowing imprecise
queries; and (3) achieving the optimal performance in
terms of search cost. The fundamental structure of the
proposed model is presented. The proposed scheme has
been simulated and the simulation results are analyzed
and compared against several other approaches that have
been advocated in the literature.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "3",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "distributed indexing; image retrieval; Semantic
representation",
}
@Article{Nystrom:2010:ECO,
author = "Marcus Nystr{\"o}m and Kenneth Holmqvist",
title = "Effect of compressed offline foveated video on viewing
behavior and subjective quality",
journal = j-TOMCCAP,
volume = "6",
number = "1",
pages = "4:1--4:??",
month = feb,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1671954.1671958",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:53:23 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Offline foveation is a technique to improve the
compression efficiency of digitized video. The general
idea behind offline foveation is to blur video regions
where no or a small number of previewers look without
decreasing the subjective quality for later viewers. It
relies on the fact that peripheral vision is reduced
compared to central vision, and the observation that
during free-viewing humans' gaze positions generally
coincide when watching video. In this article, we
conduct two experiments to assess how offline foveation
affects viewing behavior and subjective quality. In the
first experiment, 15 subjects free-viewed six video
clips before and after offline foveation whereas in the
second experiment we had 17 subjects assessing the
quality of these videos after one, two, and three
consecutive viewings. Eye movements were measured
during the experiments. Results showed that, although
offline foveation prior to encoding with H.264 yielded
data reductions up to 52\% (20\% average) on the tested
videos, it had little or no effect on where people
looked, their intersubject dispersion, fixation
duration, saccade amplitude, or the experienced quality
during first-time viewing. However, seeing the videos
more than once increased the intersubject dispersion
and decreased the subjective quality. In view of these
results, we discuss the usage of offline foveated video
in practical applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "4",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Eye-tracking; foveation; subjective quality; video
compression",
}
@Article{Ivanov:2010:RTH,
author = "Yuri V. Ivanov and C. J. Bleakley",
title = "Real-time {H.264} video encoding in software with fast
mode decision and dynamic complexity control",
journal = j-TOMCCAP,
volume = "6",
number = "1",
pages = "5:1--5:??",
month = feb,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1671954.1671959",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:53:23 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents a novel real-time algorithm for
reducing and dynamically controlling the computational
complexity of an H.264 video encoder implemented in
software. A fast mode decision algorithm, based on a
Pareto-optimal macroblock classification scheme, is
combined with a dynamic complexity control algorithm
that adjusts the MB class decisions such that a
constant frame rate is achieved. The average coding
efficiency of the proposed algorithm was found to be
similar to that of conventional encoding operating at
half the frame rate. The proposed algorithm was found
to provide lower average bitrate and distortion than
static complexity scaling.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "5",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "complexity; complexity control; fast mode decision;
H/264/AVC; mode decision; rate distortion; real time",
}
@Article{Hefeeda:2010:ASM,
author = "Mohamed Hefeeda and Kianoosh Mokhtarian",
title = "Authentication schemes for multimedia streams:
{Quantitative} analysis and comparison",
journal = j-TOMCCAP,
volume = "6",
number = "1",
pages = "6:1--6:??",
month = feb,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1671954.1671960",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Mar 16 18:53:23 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "With the rapid increase in the demand for multimedia
services, securing the delivery of multimedia content
has become an important issue. Accordingly, the problem
of multimedia stream authentication has received
considerable attention by previous research and various
solutions have been proposed. However, these solutions
have not been rigorously analyzed and contrasted to
each other, and thus their relative suitability for
different streaming environments is not clear. This
article presents comprehensive analysis and comparison
among different schemes proposed in the literature to
authenticate multimedia streams. Authentication schemes
for nonscalable and scalable multimedia streams are
analyzed. To conduct this analysis, we define five
important performance metrics, which are computation
cost, communication overhead, receiver buffer size,
delay, and tolerance to packet losses. We derive
analytic formulas for these metrics for all considered
authentication schemes to numerically analyze their
performance. In addition, we implement all schemes in a
simulator to study and compare their performance in
different environments. The parameters for the
simulator are carefully chosen to mimic realistic
settings. We draw several conclusions on the advantages
and disadvantages of each scheme. We extend our
analysis to authentication techniques for scalable
streams. We pay careful attention to the flexibility of
scalable streams and analyze its impacts on the
authentication schemes. Our analysis and comparison
reveal the merits and shortcomings of each scheme,
provide guidelines on choosing the most appropriate
scheme for a given multimedia streaming application,
and could stimulate designing new authentication
schemes or improving existing ones. For example, our
detailed analysis has led us to design a new
authentication scheme that combines the best features
of two previous schemes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "6",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "authentication schemes; Multimedia authentication;
multimedia security; multimedia streaming; scalable
coding; secure streaming",
}
@Article{Yang:2010:EMP,
author = "Zhenyu Yang and Wanmin Wu and Klara Nahrstedt and
Gregorij Kurillo and Ruzena Bajcsy",
title = "Enabling multi-party {$3$D} tele-immersive
environments with {{\em ViewCast}}",
journal = j-TOMCCAP,
volume = "6",
number = "2",
pages = "7:1--7:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1671962.1671963",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Aug 14 17:17:15 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Three-dimensional tele-immersive (3DTI) environments
have great potential to promote collaborative work
among geographically distributed users. However, most
existing 3DTI systems only work with two sites due to
the huge demand of resources and the lack of a simple
yet powerful networking model to handle connectivity,
scalability, and quality-of-service (QoS)
guarantees.\par
In this article, we explore the design space from the
angle of multi-stream management to enable multi-party
3DTI communication. Multiple correlated 3D video
streams are employed to provide a comprehensive
representation of the physical scene in each 3DTI
environment, and are rendered together to establish a
common cyberspace among all participating 3DTI
environments. The existence of multi-stream correlation
provides the unique opportunity for new approaches in
QoS provisioning. Previous work mostly concentrated on
compression and adaptation techniques on the per-stream
basis while ignoring the application layer semantics
and the coordination required among streams. We propose
an innovative and generalized {\em ViewCast\/} model to
coordinate the multi-stream content dissemination over
an overlay network. ViewCast leverages view semantics
in 3D free-viewpoint video systems to fill the gap
between high-level user interest and low-level stream
management. In ViewCast, only the view information is
specified by the user/application, while the underlying
control dynamically performs stream differentiation,
selection, coordination, and dissemination. We present
the details of ViewCast and evaluate it through both
simulation and 3DTI sessions among tele-immersive
environments residing in different institutes across
the Internet2. Our experimental results demonstrate the
implementation feasibility and performance enhancement
of ViewCast in supporting multi-party 3DTI
collaboration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "7",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "3D tele-immersion; application level multicast;
distributed multimedia system; multi-stream
coordination; networking protocol; QoS adaptation",
}
@Article{Wu:2010:ELT,
author = "Junwen Wu and Mohan M. Trivedi",
title = "An eye localization, tracking and blink pattern
recognition system: {Algorithm} and evaluation",
journal = j-TOMCCAP,
volume = "6",
number = "2",
pages = "8:1--8:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1671962.1671964",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Aug 14 17:17:15 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This study is to investigate the fundamental problems
of, (1) facial feature detection and localization,
especially eye features; and (2) eye dynamics,
including tracking and blink detection. We first
describe our contribution to eye localization.
Following that, we discuss a simultaneous eye tracking
and blink detection system. Facial feature detection is
solved in a general object detection framework and its
performance for eye localization is presented. A binary
tree representation based on feature dependency
partitions the object feature space in a coarse to fine
manner. In each compact feature subspace, independent
component analysis (ICA) is used to get the independent
sources, whose probability density functions (PDFs) are
modeled by Gaussian mixtures. When applying this
representation for the task of eye detection, a
subwindow is used to scan the entire image and each
obtained image patch is examined using Bayesian
criteria to determine the presence of an eye subject.
After the eyes are automatically located with binary
tree-based probability learning, interactive particle
filters are used for simultaneously tracking the eyes
and detecting the blinks. The particle filters use
classification-based observation models, in which the
posterior probabilities are evaluated by logistic
regressions in tensor subspaces. Extensive experiments
are used to evaluate the performance from two aspects,
(1) blink detection rate and the accuracy of blink
duration in terms of the frame numbers; (2) eye
tracking accuracy. We also present an experimental
setup for obtaining the benchmark data in tracking
accuracy evaluation. The experimental evaluation
demonstrates the capability of this approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "8",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Eye blink detection; human computer interface;
particle filtering; video processing",
}
@Article{Jin:2010:DMN,
author = "Xing Jin and S.-H. Gary Chan",
title = "Detecting malicious nodes in peer-to-peer streaming by
peer-based monitoring",
journal = j-TOMCCAP,
volume = "6",
number = "2",
pages = "9:1--9:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1671962.1671965",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Aug 14 17:17:15 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Current peer-to-peer (P2P) streaming systems often
assume that nodes cooperate to upload and download
data. However, in the open environment of the Internet,
this is not necessarily true and there exist malicious
nodes in the system. In this article, we study
malicious actions of nodes that can be detected through
peer-based monitoring. We require each node to monitor
the data received and to periodically send monitoring
messages about its neighbors to some trustworthy nodes.
To efficiently store and search messages among multiple
trustworthy nodes, we organize trustworthy nodes into a
threaded binary tree. Trustworthy nodes also
dynamically redistribute monitoring messages among
themselves to achieve load balancing. Our simulation
results show that this scheme can efficiently detect
malicious nodes with high accuracy, and that the
dynamic redistribution method can achieve good load
balancing among trustworthy nodes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "9",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Malicious nodes; peer monitoring; peer-to-peer
streaming",
}
@Article{Chiu:2010:FMH,
author = "Chih-Yi Chiu and Hsin-Min Wang and Chu-Song Chen",
title = "Fast min-hashing indexing and robust spatio-temporal
matching for detecting video copies",
journal = j-TOMCCAP,
volume = "6",
number = "2",
pages = "10:1--10:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1671962.1671966",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Aug 14 17:17:15 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The increase in the number of video copies, both legal
and illegal, has become a major problem in the
multimedia and Internet era. In this article, we
propose a novel method for detecting various video
copies in a video sequence. To achieve fast and robust
detection, the method fully integrates several
components, namely the min-hashing signature to
compactly represent a video sequence, a spatio-temporal
matching scheme to accurately evaluate video similarity
compiled from the spatial and temporal aspects, and
some speedup techniques to expedite both min-hashing
indexing and spatio-temporal matching. The results of
experiments demonstrate that, compared to several
baseline methods with different feature descriptors and
matching schemes, the proposed method which combines
both global and local feature descriptors yields the
best performance when encountering a variety of video
transformations. The method is very fast, requiring
approximately 0.06 seconds to search for copies of a
thirty-second video clip in a six-hour video
sequence.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "10",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Content-based copy detection; histogram pruning;
near-duplicate",
}
@Article{Sarhan:2010:WTP,
author = "Nabil J. Sarhan and Mohammad A. Alsmirat and Musab
Al-Hadrusi",
title = "Waiting-time prediction in scalable on-demand video
streaming",
journal = j-TOMCCAP,
volume = "6",
number = "2",
pages = "11:1--11:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1671962.1671967",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Sat Aug 14 17:17:15 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Providing video streaming users with expected waiting
times enhances their perceived quality-of-service (QoS)
and encourages them to wait. In the absence of any
waiting-time feedback, users are more likely to defect
because of the uncertainty as to when their services
will start. We analyze waiting-time predictability in
scalable video streaming. We propose two prediction
schemes and study their effectiveness when applied with
various stream merging techniques and scheduling
policies. The results demonstrate that the waiting time
can be predicted accurately, especially when enhanced
cost-based scheduling is applied. The combination of
waiting-time prediction and cost-based scheduling leads
to outstanding performance benefits.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "11",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
keywords = "Scheduling; stream merging; time-of-service
guarantees; video streaming; waiting-time prediction",
}
@Article{Xu:2010:IBP,
author = "Changsheng Xu and Eckehard Steinbach and Abdulmotaleb
{El Saddik} and Michelle Zhou",
title = "Introduction to the best papers of {ACM Multimedia
2009}",
journal = j-TOMCCAP,
volume = "6",
number = "3",
pages = "12:1--12:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1823746.1830482",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "12",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zha:2010:VQS,
author = "Zheng-Jun Zha and Linjun Yang and Tao Mei and Meng
Wang and Zengfu Wang and Tat-Seng Chua and Xian-Sheng
Hua",
title = "Visual query suggestion: {Towards} capturing user
intent in {Internet} image search",
journal = j-TOMCCAP,
volume = "6",
number = "3",
pages = "13:1--13:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1823746.1823747",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "13",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Jiang:2010:AVA,
author = "Wei Jiang and Courtenay Cotton and Shih-Fu Chang and
Dan Ellis and Alexander C. Loui",
title = "Audio-visual atoms for generic video concept
classification",
journal = j-TOMCCAP,
volume = "6",
number = "3",
pages = "14:1--14:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1823746.1823748",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "14",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{DeOliveira:2010:LND,
author = "Rodrigo {De Oliveira} and Mauro Cherubini and Nuria
Oliver",
title = "Looking at near-duplicate videos from a human-centric
perspective",
journal = j-TOMCCAP,
volume = "6",
number = "3",
pages = "15:1--15:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1823746.1823749",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "15",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yin:2010:LEC,
author = "Hao Yin and Xuening Liu and Tongyu Zhan and Vyas Sekar
and Feng Qiu and Chuang Lin and Hui Zhang and Bo Li",
title = "{LiveSky}: {Enhancing} {CDN} with {P2P}",
journal = j-TOMCCAP,
volume = "6",
number = "3",
pages = "16:1--16:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1823746.1823750",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "16",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Money:2010:EEL,
author = "Arthur G. Money and Harry Agius",
title = "{ELVIS}: {Entertainment-Led VIdeo Summaries}",
journal = j-TOMCCAP,
volume = "6",
number = "3",
pages = "17:1--17:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1823746.1823751",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "17",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hoi:2010:SSD,
author = "Steven C. h. Hoi and Wei Liu and Shih-Fu Chang",
title = "Semi-supervised distance metric learning for
collaborative image retrieval and clustering",
journal = j-TOMCCAP,
volume = "6",
number = "3",
pages = "18:1--18:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1823746.1823752",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "18",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Maddage:2010:WLA,
author = "Namunu C. Maddage and Khe Chai Sim and Haizhou Li",
title = "Word level automatic alignment of music and lyrics
using vocal synthesis",
journal = j-TOMCCAP,
volume = "6",
number = "3",
pages = "19:1--19:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1823746.1823753",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "19",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Qudah:2010:EDD,
author = "Bashar Qudah and Nabil J. Sarhan",
title = "Efficient delivery of on-demand video streams to
heterogeneous receivers",
journal = j-TOMCCAP,
volume = "6",
number = "3",
pages = "20:1--20:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1823746.1823754",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "20",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Gomes:2010:STA,
author = "Jo{\~a}o V. P. Gomes and Pedro R. M. In{\'a}cio and
Branka Lakic and M{\'a}rio M. Freire and Henrique J. A.
Da Silva and Paulo P. Monteiro",
title = "Source traffic analysis",
journal = j-TOMCCAP,
volume = "6",
number = "3",
pages = "21:1--21:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1823746.1823755",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "21",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Boll:2010:CPA,
author = "Susanne Boll and Jiebo Luo and Ramesh Jain and Dong
Xu",
title = "Call for papers: {ACM Transactions on Multimedia
Computing, Communications and Applications} special
issue on social media",
journal = j-TOMCCAP,
volume = "6",
number = "3",
pages = "22:1--22:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1823746.1837254",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "22",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Steinmetz:2010:OOD,
author = "Ralf Steinmetz",
title = "Obituary to our dear friend {Professor Dr. Nicolas D.
Georganas, PhD}",
journal = j-TOMCCAP,
volume = "6",
number = "4",
pages = "23:1--23:??",
month = nov,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1865106.1865107",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "23",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Haenselmann:2010:FSI,
author = "Thomas Haenselmann",
title = "Foreword to the special issue on multimedia sensor
fusion",
journal = j-TOMCCAP,
volume = "6",
number = "4",
pages = "24:1--24:??",
month = nov,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1865106.1865108",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "24",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wang:2010:MBA,
author = "Xiangyu Wang and Mohan Kankanhalli",
title = "{MultiFusion}: a boosting approach for multimedia
fusion",
journal = j-TOMCCAP,
volume = "6",
number = "4",
pages = "25:1--25:??",
month = nov,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1865106.1865109",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "25",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chetty:2010:MSF,
author = "Girija Chetty and Matthew White",
title = "Multimedia sensor fusion for retrieving identity in
biometric access control systems",
journal = j-TOMCCAP,
volume = "6",
number = "4",
pages = "26:1--26:??",
month = nov,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1865106.1865110",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "26",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Friedland:2010:DAS,
author = "Gerald Friedland and Chuohao Yeo and Hayley Hung",
title = "Dialocalization: {Acoustic} speaker diarization and
visual localization as joint optimization problem",
journal = j-TOMCCAP,
volume = "6",
number = "4",
pages = "27:1--27:??",
month = nov,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1865106.1865111",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "27",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Rahman:2010:SGA,
author = "Abu Saleh Md Mahfujur Rahman and M. Anwar Hossain and
Abdulmotaleb {El Saddik}",
title = "Spatial-geometric approach to physical mobile
interaction based on accelerometer and {IR} sensory
data fusion",
journal = j-TOMCCAP,
volume = "6",
number = "4",
pages = "28:1--28:??",
month = nov,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1865106.1865112",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "28",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yang:2010:EMT,
author = "Zhenyu Yang and Wanmin Wu and Klara Nahrstedt and
Gregorij Kurillo and Ruzena Bajcsy",
title = "Enabling multiparty {$3$D} tele-immersive environments
with {ViewCast}",
journal = j-TOMCCAP,
volume = "6",
number = "4",
pages = "29:1--29:??",
month = nov,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1865106.1865113",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "29",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Marshall:2010:OCM,
author = "Damien Marshall and S{\'e}amus Mcloone and Tom{\'a}s
Ward",
title = "Optimizing consistency by maximizing bandwidth usage
in distributed interactive applications",
journal = j-TOMCCAP,
volume = "6",
number = "4",
pages = "30:1--30:??",
month = nov,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1865106.1865114",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "30",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Vu:2010:UOC,
author = "Long Vu and Indranil Gupta and Klara Nahrstedt and Jin
Liang",
title = "Understanding overlay characteristics of a large-scale
peer-to-peer {IPTV} system",
journal = j-TOMCCAP,
volume = "6",
number = "4",
pages = "31:1--31:??",
month = nov,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1865106.1865115",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Tue Nov 23 10:03:16 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "31",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Meyer:2011:MRL,
author = "Marek Meyer and Christoph Rensing and Ralf Steinmetz",
title = "Multigranularity reuse of learning resources",
journal = j-TOMCCAP,
volume = "7",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870121.1870122",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:41 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "1",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Bouyakoub:2011:SBI,
author = "Samia Bouyakoub and Abdelkader Belkhir",
title = "{SMIL} builder: an incremental authoring tool for
{SMIL Documents}",
journal = j-TOMCCAP,
volume = "7",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870121.1870123",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:41 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "2",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hossain:2011:MAQ,
author = "M. Anwar Hossain and Pradeep K. Atrey and Abdulmotaleb
{El Saddik}",
title = "Modeling and assessing quality of information in
multisensor multimedia monitoring systems",
journal = j-TOMCCAP,
volume = "7",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870121.1870124",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:41 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "3",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhu:2011:NDK,
author = "Jianke Zhu and Steven C. H. Hoi and Michael R. Lyu and
Shuicheng Yan",
title = "Near-duplicate keyframe retrieval by semi-supervised
learning and nonrigid image matching",
journal = j-TOMCCAP,
volume = "7",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870121.1870125",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:41 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "4",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hsu:2011:FCL,
author = "Cheng-Hsin Hsu and Mohamed Hefeeda",
title = "A framework for cross-layer optimization of video
streaming in wireless networks",
journal = j-TOMCCAP,
volume = "7",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870121.1870126",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:41 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "5",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chandra:2011:EAS,
author = "Surendar Chandra and Xuwen Yu",
title = "An empirical analysis of serendipitous media sharing
among campus-wide wireless users",
journal = j-TOMCCAP,
volume = "7",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870121.1870127",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:41 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "6",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Gopinathan:2011:OLM,
author = "Ajay Gopinathan and Zongpeng Li",
title = "Optimal layered multicast",
journal = j-TOMCCAP,
volume = "7",
number = "2",
pages = "7:1--7:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1925101.1925102",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:42 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "7",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hsu:2011:USS,
author = "Cheng-Hsin Hsu and Mohamed Hefeeda",
title = "Using simulcast and scalable video coding to
efficiently control channel switching delay in mobile
{TV} broadcast networks",
journal = j-TOMCCAP,
volume = "7",
number = "2",
pages = "8:1--8:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1925101.1925103",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:42 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "8",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Jin:2011:KDH,
author = "Yohan Jin and Balakrishnan Prabhakaran",
title = "Knowledge discovery from {$3$D} human motion streams
through semantic dimensional reduction",
journal = j-TOMCCAP,
volume = "7",
number = "2",
pages = "9:1--9:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1925101.1925104",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:42 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "9",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Cheng:2011:MPM,
author = "Wei Cheng and Wei Tsang Ooi and Sebastien Mondet and
Romulus Grigoras and G{\'e}raldine Morin",
title = "Modeling progressive mesh streaming: {Does} data
dependency matter?",
journal = j-TOMCCAP,
volume = "7",
number = "2",
pages = "10:1--10:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1925101.1925105",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:42 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "10",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Bagchi:2011:FAD,
author = "Susmit Bagchi",
title = "A fuzzy algorithm for dynamically adaptive multimedia
streaming",
journal = j-TOMCCAP,
volume = "7",
number = "2",
pages = "11:1--11:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1925101.1925106",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:42 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "11",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hsu:2011:SMV,
author = "Cheng-Hsin Hsu and Mohamed Hefeeda",
title = "Statistical multiplexing of variable-bit-rate videos
streamed to mobile devices",
journal = j-TOMCCAP,
volume = "7",
number = "2",
pages = "12:1--12:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1925101.1925107",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Wed Mar 16 09:25:42 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "12",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Steinmetz:2011:EN,
author = "Ralf Steinmetz",
title = "Editorial notice",
journal = j-TOMCCAP,
volume = "7",
number = "3",
pages = "13:1--13:??",
month = aug,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2000486.2000487",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Sep 5 17:00:22 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "13",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Korshunov:2011:VQF,
author = "Pavel Korshunov and Wei Tsang Ooi",
title = "Video quality for face detection, recognition, and
tracking",
journal = j-TOMCCAP,
volume = "7",
number = "3",
pages = "14:1--14:??",
month = aug,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2000486.2000488",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Sep 5 17:00:22 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "14",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lin:2011:PCI,
author = "Pei-Yu Lin and Jung-San Lee and Chin-Chen Chang",
title = "Protecting the content integrity of digital imagery
with fidelity preservation",
journal = j-TOMCCAP,
volume = "7",
number = "3",
pages = "15:1--15:??",
month = aug,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2000486.2000489",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Sep 5 17:00:22 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "15",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{VanLeuken:2011:SVO,
author = "Reinier H. {Van Leuken} and Remco C. Veltkamp",
title = "Selecting vantage objects for similarity indexing",
journal = j-TOMCCAP,
volume = "7",
number = "3",
pages = "16:1--16:??",
month = aug,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2000486.2000490",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Sep 5 17:00:22 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "16",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Feng:2011:SRI,
author = "Wu-Chi Feng and Thanh Dang and John Kassebaum and Tim
Bauman",
title = "Supporting region-of-interest cropping through
constrained compression",
journal = j-TOMCCAP,
volume = "7",
number = "3",
pages = "17:1--17:??",
month = aug,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2000486.2000491",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Sep 5 17:00:22 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "17",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2011:DBA,
author = "Qingzhong Liu and Andrew H. Sung and Mengyu Qiao",
title = "Derivative-based audio steganalysis",
journal = j-TOMCCAP,
volume = "7",
number = "3",
pages = "18:1--18:??",
month = aug,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2000486.2000492",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Sep 5 17:00:22 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "18",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Li:2011:GDO,
author = "Frederick W. B. Li and Rynson W. H. Lau and Danny
Kilis and Lewis W. F. Li",
title = "Game-on-demand:: an online game engine based on
geometry streaming",
journal = j-TOMCCAP,
volume = "7",
number = "3",
pages = "19:1--19:??",
month = aug,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2000486.2000493",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
bibdate = "Mon Sep 5 17:00:22 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "19",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Shirmohammadi:2011:IAM,
author = "Shervin Shirmohammadi and Jiebo Luo and Jie Yang and
Abdulmotaleb {El Saddik}",
title = "Introduction to {ACM Multimedia 2010} best paper
candidates",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "20:1--20:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037677",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "20",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Bhattacharya:2011:HAA,
author = "Subhabrata Bhattacharya and Rahul Sukthankar and
Mubarak Shah",
title = "A holistic approach to aesthetic enhancement of
photographs",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "21:1--21:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037678",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "21",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Tan:2011:URS,
author = "Shulong Tan and Jiajun Bu and Chun Chen and Bin Xu and
Can Wang and Xiaofei He",
title = "Using rich social media information for music
recommendation via hypergraph model",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "22:1--22:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037679",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "22",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Milani:2011:CAE,
author = "Simone Milani and Giancarlo Calvagno",
title = "A cognitive approach for effective coding and
transmission of {$3$D} video",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "23:1--23:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037680",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "23",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hong:2011:VAE,
author = "Richang Hong and Meng Wang and Xiao-Tong Yuan and
Mengdi Xu and Jianguo Jiang and Shuicheng Yan and
Tat-Seng Chua",
title = "Video accessibility enhancement for hearing-impaired
users",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "24:1--24:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037681",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "24",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Boll:2011:ISI,
author = "Susanne Boll and Ramesh Jain and Jiebo Luo and Dong
Xu",
title = "Introduction to special issue on social media",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "25:1--25:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037682",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "25",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lin:2011:EOM,
author = "Yu-Ching Lin and Yi-Hsuan Yang and Homer H. Chen",
title = "Exploiting online music tags for music emotion
classification",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "26:1--26:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037683",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "26",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Rabbath:2011:ACP,
author = "Mohamad Rabbath and Philipp Sandhaus and Susanne
Boll",
title = "Automatic creation of photo books from stories in
social media",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "27:1--27:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037684",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "27",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hu:2011:RAI,
author = "Weiming Hu and Haiqiang Zuo and Ou Wu and Yunfei Chen
and Zhongfei Zhang and David Suter",
title = "Recognition of adult images, videos, and web page
bags",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "28:1--28:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037685",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "28",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lin:2011:SSC,
author = "Yu-Ru Lin and K. Sel{\c{c}}cuk Candan and Hari
Sundaram and Lexing Xie",
title = "{SCENT}: {Scalable} compressed monitoring of evolving
multirelational social networks",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "29:1--29:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037686",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "29",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Sang:2011:BCT,
author = "Jitao Sang and Changsheng Xu",
title = "Browse by chunks: {Topic} mining and organizing on
web-scale social media",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "30:1--30:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037687",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "30",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ji:2011:MFL,
author = "Rongrong Ji and Yue Gao and Bineng Zhong and Hongxun
Yao and Qi Tian",
title = "Mining {\tt flickr} landmarks by modeling
reconstruction sparsity",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "31:1--31:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037688",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "31",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Mandel:2011:CTI,
author = "Michael I. Mandel and Razvan Pascanu and Douglas Eck
and Yoshua Bengio and Luca M. Aiello and Rossano
Schifanella and Filippo Menczer",
title = "Contextual tag inference",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "32:1--32:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037689",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "32",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Biel:2011:VCB,
author = "Joan-Isaac Biel and Daniel Gatica-Perez",
title = "{VlogSense}: {Conversational} behavior and social
attention in {YouTube}",
journal = j-TOMCCAP,
volume = "7S",
number = "1",
pages = "33:1--33:??",
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2037676.2037690",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun Nov 6 06:36:59 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "33",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Anonymous:2011:TCO,
author = "Anonymous",
title = "Table of Contents: Online Supplement Volume {7S},
Number 1",
journal = j-TOMCCAP,
volume = "7",
number = "4",
pages = "34:1--34:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043612.2043620",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Dec 15 08:53:32 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "34",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hong:2011:BSE,
author = "Richang Hong and Jinhui Tang and Hung-Khoon Tan and
Chong-Wah Ngo and Shuicheng Yan and Tat-Seng Chua",
title = "Beyond search: Event-driven summarization for {Web}
videos",
journal = j-TOMCCAP,
volume = "7",
number = "4",
pages = "35:1--35:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043612.2043613",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Dec 15 08:53:32 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "35",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Kuo:2011:TPQ,
author = "Wen-Kuang Kuo and Kuo-Wei Wu",
title = "Traffic prediction and {QoS} transmission of real-time
live {VBR} videos in {WLANs}",
journal = j-TOMCCAP,
volume = "7",
number = "4",
pages = "36:1--36:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043612.2043614",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Dec 15 08:53:32 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "36",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Maddage:2011:BSS,
author = "Namunu C. Maddage and Haizhou Li",
title = "Beat space segmentation and octave scale cepstral
feature for sung language recognition in pop music",
journal = j-TOMCCAP,
volume = "7",
number = "4",
pages = "37:1--37:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043612.2043615",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Dec 15 08:53:32 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "37",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Santini:2011:ECQ,
author = "Simone Santini",
title = "Efficient computation of queries on feature streams",
journal = j-TOMCCAP,
volume = "7",
number = "4",
pages = "38:1--38:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043612.2043616",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Dec 15 08:53:32 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "38",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Verdugo:2011:IFC,
author = "Renato Verdugo and Miguel Nussbaum and Pablo Corro and
Pablo Nu{\~n}nez and Paula Navarrete",
title = "Interactive films and coconstruction",
journal = j-TOMCCAP,
volume = "7",
number = "4",
pages = "39:1--39:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043612.2043617",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Dec 15 08:53:32 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "39",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ghandeharizadeh:2011:DCC,
author = "Shahram Ghandeharizadeh and Shahin Shayandeh",
title = "Domical cooperative caching for streaming media in
wireless home networks",
journal = j-TOMCCAP,
volume = "7",
number = "4",
pages = "40:1--40:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043612.2043618",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Dec 15 08:53:32 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "40",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ghandeharizadeh:2011:CPS,
author = "Shahram Ghandeharizadeh and Shahin Shayandeh",
title = "Call for papers: Special issue on {$3$D} mobile
multimedia",
journal = j-TOMCCAP,
volume = "7",
number = "4",
pages = "41:1--41:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043612.2043619",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Dec 15 08:53:32 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "41",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Steinmetz:2012:ENC,
author = "Ralf Steinmetz",
title = "Editorial note and call for nominations: {Nicolas D.
Georganas} best paper award",
journal = j-TOMCCAP,
volume = "8",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2071396.2071397",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:02 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "1",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ghinea:2012:SSS,
author = "Georghita Ghinea and Oluwakemi Ademoye",
title = "The sweet smell of success: Enhancing multimedia
applications with olfaction",
journal = j-TOMCCAP,
volume = "8",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2071396.2071398",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:02 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Olfaction, or smell, is one of the last challenges
which multimedia applications have to conquer. As far
as computerized smell is concerned, there are several
difficulties to overcome, particularly those associated
with the ambient nature of smell. In this article, we
present results from an empirical study exploring
users' perception of olfaction-enhanced multimedia
displays. Findings show that olfaction significantly
adds to the user multimedia experience. Moreover, use
of olfaction leads to an increased sense of reality and
relevance. Our results also show that users are
tolerant of the interference and distortion effects
caused by olfactory effect in multimedia.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "2",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hefeeda:2012:DET,
author = "Mohamed Hefeeda and Cheng-Hsin Hsu",
title = "Design and evaluation of a testbed for mobile {TV}
networks",
journal = j-TOMCCAP,
volume = "8",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2071396.2071399",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:02 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents the design of a complete,
open-source, testbed for broadcast networks that offer
mobile TV services. Although basic architectures and
protocols have been developed for such networks,
detailed performance tuning and analysis are still
needed, especially when these networks scale to serve
many diverse TV channels to numerous subscribers. The
detailed performance analysis could also motivate
designing new protocols and algorithms for enhancing
future mobile TV networks. Currently, many researchers
evaluate the performance of mobile TV networks using
simulation and/or theoretical modeling methods. These
methods, while useful for early assessment, typically
abstract away many necessary details of actual, fairly
complex, networks. Therefore, an open-source platform
for evaluating new ideas in a real mobile TV network is
needed. This platform is currently not possible with
commercial products, because they are sold as black
boxes without the source code. In this article, we
summarize our experiences in designing and implementing
a testbed for mobile TV networks. We integrate
off-the-shelf hardware components with carefully
designed software modules to realize a scalable testbed
that covers almost all aspects of real networks. We use
our testbed to empirically analyze various performance
aspects of mobile TV networks and validate/refute
several claims made in the literature as well as
discover/quantify multiple important performance
tradeoffs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "3",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lin:2012:DMS,
author = "Yu-Ru Lin and Hari Sundaram and Munmun {De Choudhury}
and Aisling Kelliher",
title = "Discovering multirelational structure in social media
streams",
journal = j-TOMCCAP,
volume = "8",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2071396.2071400",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:02 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article, we present a novel algorithm to
discover multirelational structures from social media
streams. A media item such as a photograph exists as
part of a meaningful interrelationship among several
attributes, including time, visual content, users, and
actions. Discovery of such relational structures
enables us to understand the semantics of human
activity and has applications in content organization,
recommendation algorithms, and exploratory social
network analysis. We are proposing a novel nonnegative
matrix factorization framework to characterize
relational structures of group photo streams. The
factorization incorporates image content features and
contextual information. The idea is to consider a
cluster as having similar relational patterns; each
cluster consists of photos relating to similar content
or context. Relations represent different aspects of
the photo stream data, including visual content,
associated tags, photo owners, and post times. The
extracted structures minimize the mutual information of
the predicted joint distribution. We also introduce a
relational modularity function to determine the
structure cost penalty, and hence determine the number
of clusters. Extensive experiments on a large Flickr
dataset suggest that our approach is able to extract
meaningful relational patterns from group photo
streams. We evaluate the utility of the discovered
structures through a tag prediction task and through a
user study. Our results show that our method based on
relational structures, outperforms baseline methods,
including feature and tag frequency based techniques,
by 35\%--420\%. We have conducted a qualitative user
study to evaluate the benefits of our framework in
exploring group photo streams. The study indicates that
users found the extracted clustering results clearly
represent major themes in a group; the clustering
results not only reflect how users describe the group
data but often lead the users to discover the evolution
of the group activity.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "4",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Cheng:2012:EIC,
author = "Xu Cheng and Jiangchuan Liu",
title = "Exploring interest correlation for peer-to-peer
socialized video sharing",
journal = j-TOMCCAP,
volume = "8",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2071396.2071401",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:02 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The last five years have witnessed an explosion of
networked video sharing, represented by YouTube, as a
new killer Internet application. Their sustainable
development however is severely hindered by the
intrinsic limit of their client/server architecture. A
shift to the peer-to-peer paradigm has been widely
suggested with success already shown in live video
streaming and movie-on-demand. Unfortunately, our
latest measurement demonstrates that short video clips
exhibit drastically different statistics, which would
simply render these existing solutions suboptimal, if
not entirely inapplicable. Our long-term measurement
over five million YouTube videos, on the other hand,
reveals interesting social networks with strong
correlation among the videos, thus opening new
opportunities to explore. In this article, we present
NetTube, a novel peer-to-peer assisted delivering
framework that explores the user interest correlation
for short video sharing. We address a series of key
design issues to realize the system, including a
bi-layer overlay, an efficient indexing scheme, a
delay-aware scheduling mechanism, and a prefetching
strategy leveraging interest correlation. We evaluate
NetTube through both simulations and prototype
experiments, which show that it greatly reduces the
server workload, improves the playback quality and
scales well.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "5",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Mei:2012:ITC,
author = "Tao Mei and Lusong Li and Xian-Sheng Hua and Shipeng
Li",
title = "{ImageSense}: Towards contextual image advertising",
journal = j-TOMCCAP,
volume = "8",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2071396.2071402",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:02 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The daunting volumes of community-contributed media
contents on the Internet have become one of the primary
sources for online advertising. However, conventional
advertising treats image and video advertising as
general text advertising by displaying relevant ads
based on the contents of the Web page, without
considering the inherent characteristics of visual
contents. This article presents a contextual
advertising system driven by images, which
automatically associates relevant ads with an image
rather than the entire text in a Web page and
seamlessly inserts the ads in the nonintrusive areas
within each individual image. The proposed system,
called ImageSense, supports scalable advertising of,
from root to node, Web sites, pages, and images. In
ImageSense, the ads are selected based on not only
textual relevance but also visual similarity, so that
the ads yield contextual relevance to both the text in
the Web page and the image content. The ad insertion
positions are detected based on image salience, as well
as face and text detection, to minimize intrusiveness
to the user. We evaluate ImageSense on a large-scale
real-world images and Web pages, and demonstrate the
effectiveness of ImageSense for online image
advertising.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "6",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Snidaro:2012:FMV,
author = "Lauro Snidaro and Ingrid Visentini and Gian Luca
Foresti",
title = "Fusing multiple video sensors for surveillance",
journal = j-TOMCCAP,
volume = "8",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2071396.2071403",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:02 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Real-time detection, tracking, recognition, and
activity understanding of moving objects from multiple
sensors represent fundamental issues to be solved in
order to develop surveillance systems that are able to
autonomously monitor wide and complex environments. The
algorithms that are needed span therefore from image
processing to event detection and behaviour
understanding, and each of them requires dedicated
study and research. In this context, sensor fusion
plays a pivotal role in managing the information and
improving system performance. Here we present a novel
fusion framework for combining the data coming from
multiple and possibly heterogeneous sensors observing a
surveillance area.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "7",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Huang:2012:TAM,
author = "Jiun-Long Huang and Shih-Chuan Chiu and Man-Kwan
Shan",
title = "Towards an automatic music arrangement framework using
score reduction",
journal = j-TOMCCAP,
volume = "8",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2071396.2071404",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:02 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Score reduction is a process that arranges music for a
target instrument by reducing original music. In this
study we present a music arrangement framework that
uses score reduction to automatically arrange music for
a target instrument. The original music is first
analyzed to determine the type of arrangement element
of each section, then the phrases are identified and
each is assigned a utility according to its type of
arrangement element. For a set of utility-assigned
phrases, we transform the music arrangement into an
optimization problem and propose a phrase selection
algorithm. The music is arranged by selecting
appropriate phrases satisfying the playability
constraints of a target instrument. Using the proposed
framework, we implement a music arrangement system for
the piano. An approach similar to Turing test is used
to evaluate the quality of the music arranged by our
system. The experiment results show that our system is
able to create viable music for the piano.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "8",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Steinmetz:2012:EN,
author = "Ralf Steinmetz",
title = "Editorial note",
journal = j-TOMCCAP,
volume = "8s",
number = "1",
pages = "9:1--9:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2089085.2089086",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:04 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "9",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2012:BET,
author = "Dongyu Liu and Fei Li and Bo Shen and Songqing Chen",
title = "Building an efficient transcoding overlay for {P2P}
streaming to heterogeneous devices",
journal = j-TOMCCAP,
volume = "8s",
number = "1",
pages = "10:1--10:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2089085.2089087",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:04 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "With the increasing deployment of Internet P2P/overlay
streaming systems, more and more clients use mobile
devices, such as smart phones and PDAs, to access these
Internet streaming services. Compared to wired
desktops, mobile devices normally have a smaller screen
size, a less color depth, and lower bandwidth and thus
cannot correctly and effectively render and display the
data streamed to desktops. To address this problem, in
this paper, we propose PAT (Peer-Assisted Transcoding)
to enable effective online transcoding in P2P/overlay
streaming. PAT has the following unique features.
First, it leverages active peer cooperation without
demanding infrastructure support such as transcoding
servers. Second, as online transcoding is
computationally intensive while the various devices
used by participating clients may have limited
computing power and related resources (e.g., battery,
bandwidth), an additional overlay, called metadata
overlay, is constructed to instantly share the
intermediate transcoding result of a transcoding
procedure with other transcoding nodes to minimize the
total computing overhead in the system. The
experimental results collected within a realistically
simulated testbed show that by consuming 6\% extra
bandwidth, PAT could save up to 58\% CPU cycles for
online transcoding.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "10",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Shen:2012:IFP,
author = "Zhijie Shen and Roger Zimmermann",
title = "{ISP}-friendly {P2P} live streaming: a roadmap to
realization",
journal = j-TOMCCAP,
volume = "8s",
number = "1",
pages = "11:1--11:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2089085.2089088",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:04 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Peer-to-Peer (P2P) applications generate large amounts
of Internet network traffic. The wide-reaching
connectivity of P2P systems is creating resource
inefficiencies for network providers. Recent studies
have demonstrated that localizing cross-ISP (Internet
service provider) traffic can mitigate this challenge.
However, bandwidth sensitivity and display quality
requirements complicate the ISP-friendly design for
live streaming systems. To this date, although some
prior techniques focusing on live streaming systems
exist, the correlation between traffic localization and
streaming quality guarantee has not been well explored.
Additionally, the proposed solutions are often not easy
to apply in practice. In our presented work, we
demonstrate that the cross-ISP traffic of P2P live
streaming systems can be significantly reduced with
little impact on the streaming quality. First, we
analytically investigate and quantify the tradeoff
between traffic localization and streaming quality
guarantee, determining the lower bound of the inter-AS
(autonomous system) streaming rate below which
streaming quality cannot be preserved. Based on the
analysis, we further propose a practical ISP-friendly
solution, termed IFPS, which requires only minor
changes to the peer selection mechanism and can easily
be integrated into both new and existing systems.
Additionally, the significant opportunity for
localizing traffic is underscored by our collected
traces from PPLive, which also enabled us to derive
realistic parameters to guide our simulations. The
experimental results demonstrate that IFPS reduces
cross-ISP traffic from 81\% up to 98\% while keeping
streaming quality virtually unaffected.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "11",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lou:2012:QDD,
author = "Xiaosong Lou and Kai Hwang",
title = "Quality of data delivery in peer-to-peer video
streaming",
journal = j-TOMCCAP,
volume = "8s",
number = "1",
pages = "12:1--12:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2089085.2089089",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:04 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "QoS in a P2P video streaming system is evaluated in
three stages: content generation, data delivery and
video playback. We use jitter-free probability as the
main performance metric to study Quality of Data
delivery (QoD). A new model that incorporates both
bandwidth and data availability of P2P network is
proposed. Our model relies on a sharing factor that
models data availability among all peers. We simulate
on a minimalistic network to demonstrate how to apply
the analytical model to design a P2P video streaming
system with a very low jitter rate. Our simulation
experimental results reveal that the lower bound on
jitter-free probability is indeed effective to reflect
the QoD of the entire system. Our model captures the
impact of many design choices, including upload
bandwidth limit, peer selection strategies, and video
stream chunking schemes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "12",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wu:2012:DNW,
author = "Chuan Wu and Baochun Li and Shuqiao Zhao",
title = "Diagnosing network-wide {P2P} live streaming
inefficiencies",
journal = j-TOMCCAP,
volume = "8s",
number = "1",
pages = "13:1--13:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2089085.2089090",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:04 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Large-scale live peer-to-peer (P2P) streaming
applications have been successfully deployed in today's
Internet. While they can accommodate hundreds of
thousands of users simultaneously with hundreds of
channels of programming, there still commonly exist
channels and times where and when the streaming quality
is unsatisfactory. In this paper, based on more than
two terabytes and one year worth of live traces from
UUSee, a large-scale commercial P2P live streaming
system, we show an in-depth network-wide diagnosis of
streaming inefficiencies, commonly present in typical
mesh-based P2P live streaming systems. As the first
highlight of our work, we identify an evolutionary
pattern of low streaming quality in the system, and the
distribution of streaming inefficiencies across various
streaming channels and in different geographical
regions. We then carry out an extensive investigation
to explore the causes to such streaming inefficiencies
over different times and across different
channels/regions at specific times, by investigating
the impact of factors such as the number of peers, peer
upload bandwidth, inter-peer bandwidth availability,
server bandwidth consumption, and many more. The
original discoveries we have brought forward include
the two-sided effects of peer population on the
streaming quality in a streaming channel, the
significant impact of inter-peer bandwidth bottlenecks
at peak times, and the inefficient utilization of
server capacities across concurrent channels. Based on
these insights, we identify problems within the
existing P2P live streaming design and discuss a number
of suggestions to improve real-world streaming
protocols operating at a large scale.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "13",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wu:2012:ABP,
author = "Chuan Wu and Zongpeng Li and Xuanjia Qiu and Francis
C. M. Lau",
title = "Auction-based {P2P VoD} streaming: Incentives and
optimal scheduling",
journal = j-TOMCCAP,
volume = "8s",
number = "1",
pages = "14:1--14:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2089085.2089091",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:04 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Real-world large-scale Peer-to-Peer (P2P)
Video-on-Demand (VoD) streaming applications face more
design challenges as compared to P2P live streaming,
due to higher peer dynamics and less buffer overlap.
The situation is further complicated when we consider
the selfish nature of peers, who in general wish to
download more and upload less, unless otherwise
motivated. Taking a new perspective of distributed
dynamic auctions, we design efficient P2P VoD streaming
algorithms with simultaneous consideration of peer
incentives and streaming optimality. In our solution,
media block exchanges among peers are carried out
through local auctions, in which budget-constrained
peers bid for desired blocks from their neighbors,
which in turn deliver blocks to the winning bidders and
collect revenue. With strategic design of a
discriminative second price auction with seller
reservation, a supplying peer has full incentive to
maximally contribute its bandwidth to increase its
budget; requesting peers are also motivated to bid in
such a way that optimal media block scheduling is
achieved effectively in a fully decentralized fashion.
Applying techniques from convex optimization and
mechanism design, we prove (a) the incentive
compatibility at the selling and buying peers, and (b)
the optimality of the induced media block scheduling in
terms of social welfare maximization. Large-scale
empirical studies are conducted to investigate the
behavior of the proposed auction mechanisms in dynamic
P2P VoD systems based on real-world settings.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "14",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhang:2012:PHL,
author = "Tieying Zhang and Xueqi Cheng and Jianming Lv and
Zhenhua Li and Weisong Shi",
title = "Providing hierarchical lookup service for {P2P--VoD}
systems",
journal = j-TOMCCAP,
volume = "8s",
number = "1",
pages = "15:1--15:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2089085.2089092",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Mar 16 15:56:04 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Supporting random jump in P2P-VoD systems requires
efficient lookup for the `best' suppliers, where `best'
means the suppliers should meet two requirements:
content match and network quality match. Most studies
use a DHT-based method to provide content lookup;
however, these methods are neither able to meet the
network quality requirements nor suitable for VoD
streaming due to the large overhead. In this paper, we
propose Mediacoop, a novel hierarchical lookup scheme
combining both content and quality match to provide
random jumps for P2P-VoD systems. It exploits the play
position to efficiently locate the candidate suppliers
with required data (content match), and performs
refined lookup within the candidates to meet quality
match. Theoretical analysis and simulation results show
that Mediacoop is able to achieve lower jump latency
and control overhead than the typical DHT-based method.
Moreover, we implement Mediacoop in a BitTorrent-like
P2P-VoD system called CoolFish and make optimizations
for such ` total cache' applications. The
implementation and evaluation in CoolFish show that
Mediacoop is able to improve user experiences,
especially the jump latency, which verifies the
practicability of our design.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "15",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Anonymous:2012:TCO,
author = "Anonymous",
title = "Table of Contents: Online Supplement Volume {8S},
Number 1",
journal = j-TOMCCAP,
volume = "8",
number = "2",
pages = "16:1--16:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2168996.2169004",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:03 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "16",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Dornaika:2012:IRF,
author = "Fadi Dornaika and James H. Elder",
title = "Image registration for foveated panoramic sensing",
journal = j-TOMCCAP,
volume = "8",
number = "2",
pages = "17:1--17:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2168996.2168997",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:03 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article addresses the problem of registering
high-resolution, small field-of-view images with
low-resolution panoramic images provided by a panoramic
catadioptric video sensor. Such systems may find
application in surveillance and telepresence systems
that require a large field of view and high resolution
at selected locations. Although image registration has
been studied in more conventional applications, the
problem of registering panoramic and conventional video
has not previously been addressed, and this problem
presents unique challenges due to (i) the extreme
differences in resolution between the sensors (more
than a 16:1 linear resolution ratio in our
application), and (ii) the resolution inhomogeneity of
panoramic images. The main contributions of this
article are as follows. First, we introduce our
foveated panoramic sensor design. Second, we show how a
coarse registration can be computed from the raw images
using parametric template matching techniques. Third,
we propose two refinement methods allowing automatic
and near real-time registration between the two image
streams. The first registration method is based on
matching extracted interest points using a closed form
method. The second registration method is featureless
and based on minimizing the intensity discrepancy
allowing the direct recovery of both the geometric and
the photometric transforms. Fourth, a comparison
between the two registration methods is carried out,
which shows that the featureless method is superior in
accuracy. Registration examples using the developed
methods are presented.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "17",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhang:2012:CPC,
author = "Xin Zhang and Tom{\'a}s Ward and S{\'e}amus Mcloone",
title = "Comparison of predictive contract mechanisms from an
information theory perspective",
journal = j-TOMCCAP,
volume = "8",
number = "2",
pages = "18:1--18:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2168996.2168998",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:03 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Inconsistency arises across a Distributed Virtual
Environment due to network latency induced by state
changes communications. Predictive Contract Mechanisms
(PCMs) combat this problem through reducing the amount
of messages transmitted in return for perceptually
tolerable inconsistency. To date there are no methods
to quantify the efficiency of PCMs in communicating
this reduced state information. This article presents
an approach derived from concepts in information theory
for a deeper understanding of PCMs. Through a
comparison of representative PCMs, the worked analysis
illustrates interesting aspects of PCMs operation and
demonstrates how they can be interpreted as a form of
lossy information compression.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "18",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Olsen:2012:ITN,
author = "Dan R. Olsen and Derek Bunn and Trent Boulter and
Robert Walz",
title = "Interactive television news",
journal = j-TOMCCAP,
volume = "8",
number = "2",
pages = "19:1--19:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2168996.2168999",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:03 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "A new interactive television experience has been
created for watching television news. The goal is to
create a news experience that is similar to the way
people watch television in their living rooms while
giving viewers the power to make choices about what
they see. We partnered with existing news organizations
to create tools consistent with current news production
practices. The viewer experience allows selection of
the order of news content, skipping unwanted content
and exploring stories in more depth. These tools were
used to produce seven days of interactive commercial
news that were viewed in ten homes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "19",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Armitage:2012:ROF,
author = "Grenville Armitage and Amiel Heyde",
title = "{REED}: {Optimizing} first person shooter game server
discovery using network coordinates",
journal = j-TOMCCAP,
volume = "8",
number = "2",
pages = "20:1--20:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2168996.2169000",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:03 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Online First Person Shooter (FPS) games typically use
a client-server communication model, with thousands of
enthusiast-hosted game servers active at any time.
Traditional FPS server discovery may take minutes, as
clients create thousands of short-lived packet flows
while probing all available servers to find a selection
of game servers with tolerable round trip time (RTT).
REED reduces a client's probing time and network
traffic to 1\% of traditional server discovery. REED
game servers participate in a centralized, incremental
calculation of their network coordinates, and clients
use these coordinates to expedite the discovery of
servers with low RTTs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "20",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2012:ILC,
author = "Xiaobai Liu and Shuicheng Yan and Tat-Seng Chua and
Hai Jin",
title = "Image label completion by pursuing contextual
decomposability",
journal = j-TOMCCAP,
volume = "8",
number = "2",
pages = "21:1--21:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2168996.2169001",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:03 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article investigates how to automatically
complete the missing labels for the partially annotated
images, without image segmentation. The label
completion procedure is formulated as a nonnegative
data factorization problem, to decompose the global
image representations that are used for describing the
entire images, for instance, various image feature
descriptors, into their corresponding label
representations, that are used for describing the local
semantic regions within images. The solution provided
in this work is motivated by following observations.
First, label representations of the regions with the
same label often share certain commonness, yet may be
essentially different due to the large intraclass
variations. Thus, each label or concept should be
represented by using a subspace spanned by an ensemble
of basis, instead of a single one, to characterize the
intralabel diversities. Second, the subspaces for
different labels are different from each other. Third,
while two images are similar with each other, the
corresponding label representations should be similar.
We formulate this cross-image context as well as the
given partial label annotations in the framework of
nonnegative data factorization and then propose an
efficient multiplicative nonnegative update rules to
alternately optimize the subspaces and the
reconstruction coefficients. We also provide the
theoretic proof of algorithmic convergence and
correctness. Extensive experiments over several
challenging image datasets clearly demonstrate the
effectiveness of our proposed solution in boosting the
quality of image label completion and image annotation
accuracy. Based on the same formulation, we further
develop a label ranking algorithms, to refine the
noised image labels without any manual supervision. We
compare the proposed label ranking algorithm with the
state-of-the-arts over the popular evaluation databases
and achieve encouragingly improvements.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "21",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chen:2012:SGU,
author = "Yi Chen and Abhidnya A. Deshpande and Ramazan S.
Ayg{\"u}un",
title = "Sprite generation using sprite fusion",
journal = j-TOMCCAP,
volume = "8",
number = "2",
pages = "22:1--22:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2168996.2169002",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:03 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "There has been related research for sprite or mosaic
generation for over 15 years. In this article, we try
to understand the methodologies for sprite generation
and identify what has not actually been covered for
sprite generation. We first identify issues and focus
on the domain of videos for sprite generation. We
introduce a novel sprite fusion method that blends two
sprites. Sprite fusion method produces good results for
tracking videos and does not require object
segmentation. We present sample results of our
experiments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "22",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Weng:2012:CVR,
author = "Ming-Fang Weng and Yung-Yu Chuang",
title = "Collaborative video reindexing via matrix
factorization",
journal = j-TOMCCAP,
volume = "8",
number = "2",
pages = "23:1--23:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2168996.2169003",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:03 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Concept-based video indexing generates a matrix of
scores predicting the possibilities of concepts
occurring in video shots. Based on the idea of
collaborative filtering, this article presents
unsupervised methods to refine the initial scores
generated by concept classifiers by taking into account
the concept-to-concept correlation and shot-to-shot
similarity embedded within the score matrix. Given a
noisy matrix, we refine the inaccurate scores via
matrix factorization. This method is further improved
by learning multiple local models and incorporating
contextual-temporal structures. Experiments on the
TRECVID 2006--2008 datasets demonstrate relative
performance gains ranging from 13\% to 52\% without
using any user annotations or external knowledge
resources.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "23",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Kankanhalli:2012:ISI,
author = "Mohan S. Kankanhalli",
title = "Introduction to special issue on multimedia security",
journal = j-TOMCCAP,
volume = "8",
number = "2S",
pages = "31:1--31:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2344436.2344437",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:05 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "31",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Weir:2012:IHV,
author = "Jonathan Weir and Weiqi Yan and Mohan S. Kankanhalli",
title = "Image hatching for visual cryptography",
journal = j-TOMCCAP,
volume = "8",
number = "2S",
pages = "32:1--32:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2344436.2344438",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:05 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Image hatching (or nonphotorealistic line-art) is a
technique widely used in the printing or engraving of
currency. Diverse styles of brush strokes have
previously been adopted for different areas of an image
to create aesthetically pleasing textures and shading.
Because there is no continuous tone within these types
of images, a multilevel scheme is proposed, which uses
different textures based on a threshold level. These
textures are then applied to the different levels and
are then combined to build up the final hatched image.
The proposed technique allows a secret to be hidden
using Visual Cryptography (VC) within the hatched
images. Visual cryptography provides a very powerful
means by which one secret can be distributed into two
or more pieces known as shares. When the shares are
superimposed exactly together, the original secret can
be recovered without computation. Also provided is a
comparison between the original grayscale images and
the resulting hatched images that are generated by the
proposed algorithm. This reinforces that the overall
quality of the hatched scheme is sufficient. The
Structural SIMilarity index (SSIM) is used to perform
this comparison.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "32",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Li:2012:RIB,
author = "Jian Li and Hongmei Liu and Jiwu Huang and Yun Q.
Shi",
title = "Reference index-based {H.264} video watermarking
scheme",
journal = j-TOMCCAP,
volume = "8",
number = "2S",
pages = "33:1--33:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2344436.2344439",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:05 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Video watermarking has received much attention over
the past years as a promising solution to copy
protection. Watermark robustness is still a key issue
of research, especially when a watermark is embedded in
the compressed video domain. In this article, a robust
watermarking scheme for H.264 video is proposed. During
video encoding, the watermark is embedded in the index
of the reference frame, referred to as reference index,
a bitstream syntax element newly proposed in the H.264
standard. Furthermore, the video content (current coded
blocks) is modified based on an optimization model,
aiming at improving watermark robustness without
unacceptably degrading the video's visual quality or
increasing the video's bit rate. Compared with the
existing schemes, our method has the following three
advantages: (1) The bit rate of the watermarked video
is adjustable; (2) the robustness against common video
operations can be achieved; (3) the watermark embedding
and extraction are simple. Extensive experiments have
verified the good performance of the proposed
watermarking scheme.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "33",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Gao:2012:RHC,
author = "Xifeng Gao and Caiming Zhang and Yan Huang and Zhigang
Deng",
title = "A robust high-capacity affine-transformation-invariant
scheme for watermarking {$3$D} geometric models",
journal = j-TOMCCAP,
volume = "8",
number = "2S",
pages = "34:1--34:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2344436.2344440",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:05 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article we propose a novel, robust, and
high-capacity watermarking method for 3D meshes with
arbitrary connectivities in the spatial domain based on
affine invariants. Given a 3D mesh model, a watermark
is embedded as affine-invariant length ratios of one
diagonal segment to the residing diagonal intersected
by the other one in a coplanar convex quadrilateral. In
the extraction process, a watermark is recovered by
combining all the watermark pieces embedded in length
ratios through majority voting. Extensive experimental
results demonstrate the robustness, high computational
efficiency, high capacity, and
affine-transformation-invariant characteristics of the
proposed approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "34",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yang:2012:EMA,
author = "Rui Yang and Zhenhua Qu and Jiwu Huang",
title = "Exposing {MP3} audio forgeries using frame offsets",
journal = j-TOMCCAP,
volume = "8",
number = "2S",
pages = "35:1--35:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2344436.2344441",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:05 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Audio recordings should be authenticated before they
are used as evidence. Although audio watermarking and
signature are widely applied for authentication, these
two techniques require accessing the original audio
before it is published. Passive authentication is
necessary for digital audio, especially for the most
popular audio format: MP3. In this article, we propose
a passive approach to detect forgeries of MP3 audio.
During the process of MP3 encoding the audio samples
are divided into frames, and thus each frame has its
own frame offset after encoding. Forgeries lead to the
breaking of framing grids. So the frame offset is a
good indication for locating forgeries, and it can be
retrieved by the identification of the quantization
characteristic. In this way, the doctored positions can
be automatically located. Experimental results
demonstrate that the proposed approach is effective in
detecting some common forgeries, such as deletion,
insertion, substitution, and splicing. Even when the
bit rate is as low as 32 kbps, the detection rate is
above 99\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "35",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Feng:2012:CAO,
author = "Hui Feng and Hefei Ling and Fuhao Zou and Weiqi Yan
and Zhengding Lu",
title = "A collusion attack optimization strategy for digital
fingerprinting",
journal = j-TOMCCAP,
volume = "8",
number = "2S",
pages = "36:1--36:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2344436.2344442",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:05 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Collusion attack is a cost-efficient attack for
digital fingerprinting. In this article, we propose a
novel collusion attack strategy, Iterative Optimization
Collusion Attack (IOCA), which is based upon the
gradient attack and the principle of informed watermark
embedding. We evaluate the performance of the proposed
collusion attack strategy in defeating four typical
fingerprinting schemes under a well-constructed
evaluation framework. The simulation results show that
the proposed strategy performs more effectively than
the gradient attack, and adopting no more than three
fingerprinted copies can sufficiently collapse examined
fingerprinting schemes. Meanwhile, the content resulted
from the proposed attack still preserves high
perceptual quality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "36",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Sachan:2012:ALV,
author = "Amit Sachan and Sabu Emmanuel and Mohan S.
Kankanhalli",
title = "Aggregate licenses validation for digital rights
violation detection",
journal = j-TOMCCAP,
volume = "8",
number = "2S",
pages = "37:1--37:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2344436.2344443",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:05 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Digital Rights Management (DRM) is the term associated
with the set of technologies to prevent illegal
multimedia content distribution and consumption. DRM
systems generally involve multiple parties such as
owner, distributors, and consumers. The owner issues
redistribution licenses to its distributors. The
distributors in turn using their received
redistribution licenses can generate and issue new
redistribution licenses to other distributors and new
usage licenses to consumers. As a part of rights
violation detection, these newly generated licenses
must be validated by a validation authority against the
redistribution license used to generate them. The
validation of these newly generated licenses becomes
quite complex when there exist multiple redistribution
licenses for a media with the distributors. In such
cases, the validation process requires validation using
an exponential number (to the number of redistribution
licenses) of validation inequalities and each
validation inequality may contain up to an exponential
number of summation terms. This makes the validation
process computationally intensive and necessitates to
do the validation efficiently. To overcome this, we
propose validation tree, a prefix-tree-based validation
method to do the validation efficiently. Theoretical
analysis and experimental results show that our
proposed technique reduces the validation time
significantly.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "37",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Riiser:2012:VSU,
author = "Haakon Riiser and Tore Endestad and Paul Vigmostad and
Carsten Griwodz and P{\^a}l Halvorsen",
title = "Video streaming using a location-based
bandwidth-lookup service for bitrate planning",
journal = j-TOMCCAP,
volume = "8",
number = "3",
pages = "24:1--24:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2240136.2240137",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:06 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "A lot of people around the world commute using public
transportation and would like to spend this time
viewing streamed video content such as news or sports
updates. However, mobile wireless networks typically
suffer from severe bandwidth fluctuations, and the
networks are often completely unresponsive for several
seconds, sometimes minutes. Today, there are several
ways of adapting the video bitrate and thus the video
quality to such fluctuations, for example, using
scalable video codecs or segmented adaptive HTTP
streaming that switches between nonscalable video
streams encoded in different bitrates. Still, for a
better long-term video playout experience that avoids
disruptions and frequent quality changes while using
existing video adaptation technology, it is desirable
to perform bandwidth prediction and planned quality
adaptation. This article describes a video streaming
system for receivers equipped with a GPS. A receiver's
download rate is constantly monitored, and periodically
reported back to a central database along with
associated GPS positional data. Thus, based on the
current location, a streaming device can use a
GPS-based bandwidth-lookup service in order to better
predict the near-future bandwidth availability and
create a schedule for the video playout that takes
likely future availability into account. To create a
prototype and perform initial tests, we conducted
several field trials while commuting using public
transportation. We show how our database has been used
to predict bandwidth fluctuations and network outages,
and how this information helps maintain uninterrupted
playback with less compromise on video quality than
possible without prediction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "24",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Valdes:2012:AEV,
author = "Victor Valdes and Jose M. Martinez",
title = "Automatic evaluation of video summaries",
journal = j-TOMCCAP,
volume = "8",
number = "3",
pages = "25:1--25:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2240136.2240138",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:06 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article describes a method for the automatic
evaluation of video summaries based on the training of
individual predictors for different quality measures
from the TRECVid 2008 BBC Rushes Summarization Task.
The obtained results demonstrate that, with a large set
of evaluation data, it is possible to train fully
automatic evaluation systems based on visual features
automatically extracted from the summaries. The
proposed approach will enable faster and easier
estimation of the results of newly developed
abstraction algorithms and the study of which summary
characteristics influence their perceived quality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "25",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Tian:2012:STL,
author = "Xinmei Tian and Dacheng Tao and Yong Rui",
title = "Sparse transfer learning for interactive video search
reranking",
journal = j-TOMCCAP,
volume = "8",
number = "3",
pages = "26:1--26:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2240136.2240139",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:06 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Visual reranking is effective to improve the
performance of the text-based video search. However,
existing reranking algorithms can only achieve limited
improvement because of the well-known semantic gap
between low-level visual features and high-level
semantic concepts. In this article, we adopt
interactive video search reranking to bridge the
semantic gap by introducing user's labeling effort. We
propose a novel dimension reduction tool, termed sparse
transfer learning (STL), to effectively and efficiently
encode user's labeling information. STL is particularly
designed for interactive video search reranking.
Technically, it (a) considers the pair-wise
discriminative information to maximally separate
labeled query relevant samples from labeled query
irrelevant ones, (b) achieves a sparse representation
for the subspace to encodes user's intention by
applying the elastic net penalty, and (c) propagates
user's labeling information from labeled samples to
unlabeled samples by using the data distribution
knowledge. We conducted extensive experiments on the
TRECVID 2005, 2006 and 2007 benchmark datasets and
compared STL with popular dimension reduction
algorithms. We report superior performance by using the
proposed STL-based interactive video search
reranking.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "26",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhang:2012:IBD,
author = "Xin Zhang and Tom{\'a}s E. Ward and S{\'e}amus
Mcloone",
title = "An information-based dynamic extrapolation model for
networked virtual environments",
journal = j-TOMCCAP,
volume = "8",
number = "3",
pages = "27:1--27:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2240136.2240140",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:06 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Various Information Management techniques have been
developed to help maintain a consistent shared virtual
world in a Networked Virtual Environment. However, such
techniques have to be carefully adapted to the
application state dynamics and the underlying network.
This work presents a novel framework that minimizes
inconsistency by optimizing bandwidth usage to deliver
useful information. This framework measures the state
evolution using an information model and dynamically
switches extrapolation models and the packet rate to
make the most information-efficient usage of the
available bandwidth. The results shown demonstrate that
this approach can help optimize consistency under
constrained and time-varying network conditions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "27",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yang:2012:UCM,
author = "Linjun Yang and Bo Geng and Alan Hanjalic and
Xian-Sheng Hua",
title = "A unified context model for web image retrieval",
journal = j-TOMCCAP,
volume = "8",
number = "3",
pages = "28:1--28:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2240136.2240141",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:06 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Content-based web image retrieval based on the
query-by-example (QBE) principle remains a challenging
problem due to the semantic gap as well as the gap
between a user's intent and the representativeness of a
typical image query. In this article, we propose to
address this problem by integrating query-related
contextual information into an advanced query model to
improve the performance of QBE-based web image
retrieval. We consider both the local and global
context of the query image. The local context can be
inferred from the web pages and the click-through log
associated with the query image, while the global
context is derived from the entire corpus comprising
all web images and the associated web pages. To
effectively incorporate the local query context we
propose a language modeling based approach to deal with
the combined structured query representation from the
contextual and visual information. The global query
context is integrated by the multi-modal relevance
model to ``reconstruct'' the query from the document
models indexed in the corpus. In this way, the global
query context is employed to address the noise or
missing information in the query and its local context,
so that a comprehensive and robust query model can be
obtained. We evaluated the proposed approach on a
representative product image dataset collected from the
web and demonstrated that the inclusion of the local
and global query contexts significantly improves the
performance of QBE-based web image retrieval.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "28",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Patras:2012:CTS,
author = "Paul Patras and Albert Banchs and Pablo Serrano",
title = "A control theoretic scheme for efficient video
transmission over {IEEE 802.11e EDCA WLANs}",
journal = j-TOMCCAP,
volume = "8",
number = "3",
pages = "29:1--29:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2240136.2240142",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:06 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The EDCA mechanism of the IEEE 802.11 standard has
been designed to support, among others, video traffic.
This mechanism relies on a number of parameters whose
configuration is left open by the standard. Although
there are some recommended values for these parameters,
they are fixed independent of the WLAN conditions,
which results in suboptimal performance. Following this
observation, a number of approaches in the literature
have been devised to set the EDCA parameters based on
an estimation of the WLAN conditions. However, these
previous approaches are based on heuristics and hence
do not guarantee optimized performance. In this article
we propose a novel algorithm to adjust the EDCA
parameters to carry video traffic which, in contrast to
previous approaches, is sustained on mathematical
foundations that guarantee optimal performance. In
particular, our approach builds upon (i) an analytical
model of the WLAN performance under video traffic, used
to derive the optimal point of operation of EDCA, and
(ii) a control theoretic designed mechanism which
drives the WLAN to this point of operation. Via
extensive simulations, we show that the proposed
approach performs optimally and substantially
outperforms the standard recommended configuration as
well as previous adaptive proposals.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "29",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhu:2012:JLS,
author = "Xinglei Zhu and Chang W. Chen",
title = "A joint layered scheme for reliable and secure mobile
{JPEG-2000} streaming",
journal = j-TOMCCAP,
volume = "8",
number = "3",
pages = "30:1--30:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2240136.2240143",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:06 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents a novel joint layered approach
to simultaneously achieve both reliable and secure
mobile JPEG-2000 image streaming. With a priori
knowledge of JPEG-2000 source coding and channel
coding, the proposed joint system integrates
authentication into the media error protection
components to ensure that every source-decodable media
unit is authenticated. By such a dedicated design, the
proposed scheme protects both compressed JPEG-2000
codestream and the authentication data from wireless
channel impairments. It is fundamentally different from
many existing systems that consider the problem of
media authentication separately from the other
operations in the media transmission system. By
utilizing the contextual relationship, such as coding
dependency and content importance between media slices
for authentication hash appending, the proposed scheme
generates an extremely low authentication overhead.
Under this joint layered coding framework, an optimal
rate allocation algorithm for source coding, channel
coding, and media authentication is developed to
guarantee end-to-end media quality. Experiment results
on JPEG-2000 images validate the proposed scheme and
demonstrate that the performance of the proposed scheme
is approaching its upper bound, in which case no
authentication is applied to the media stream.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "30",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Gatica-Perez:2012:ISS,
author = "Daniel Gatica-Perez and Gang Hua and Wei Tsang Ooi and
P{\aa}l Halvorsen",
title = "Introduction to the special section of best papers of
{ACM Multimedia 2011}",
journal = j-TOMCCAP,
volume = "8",
number = "3s",
pages = "38:1--38:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348816.2348817",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:07 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "38",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wu:2012:CPA,
author = "Wanmin Wu and Ahsan Arefin and Gregorij Kurillo and
Pooja Agarwal and Klara Nahrstedt and Ruzena Bajcsy",
title = "{CZLoD}: a psychophysical approach for {$3$D}
tele-immersive video",
journal = j-TOMCCAP,
volume = "8",
number = "3s",
pages = "39:1--39:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348816.2348818",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:07 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents a psychophysical study that
measures the perceptual thresholds of a new factor
called Color-plus-Depth Level-of-Details (CZLoD)
peculiar to polygon-based 3D tele-immersive video. The
results demonstrate the existence of Just Noticeable
Degradation and Just Unacceptable Degradation
thresholds on the factor. In light of the results, we
design and implement a real-time perception-based
quality adaptor for 3D tele-immersive video. Our
experimental results show that the adaptation scheme
can reduce resource usage (e.g., CPU cycles) while
considerably enhancing the overall perceived visual
quality. Our analysis confirms the potential temporal
and spatial performance benefits achievable with CZLoD
adaptation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "39",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ji:2012:AQS,
author = "Rongrong Ji and Felix X. Yu and Tongtao Zhang and
Shih-Fu Chang",
title = "Active query sensing: {Suggesting} the best query view
for mobile visual search",
journal = j-TOMCCAP,
volume = "8",
number = "3s",
pages = "40:1--40:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348816.2348819",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:07 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "While much exciting progress is being made in mobile
visual search, one important question has been left
unexplored in all current systems. When searching
objects or scenes in the 3D world, which viewing angle
is more likely to be successful? More particularly, if
the first query fails to find the right target, how
should the user control the mobile camera to form the
second query? In this article, we propose a novel
Active Query Sensing system for mobile location search,
which actively suggests the best subsequent query view
to recognize the physical location in the mobile
environment. The proposed system includes two unique
components: (1) an offline process for analyzing the
saliencies of different views associated with each
geographical location, which predicts the location
search precisions of individual views by modeling their
self-retrieval score distributions. (2) an online
process for estimating the view of an unseen query, and
suggesting the best subsequent view change.
Specifically, the optimal viewing angle change for the
next query can be formulated as an online information
theoretic approach. Using a scalable visual search
system implemented over a NYC street view dataset (0.3
million images), we show a performance gain by reducing
the failure rate of mobile location search to only 12\%
after the second query. We have also implemented an
end-to-end functional system, including user interfaces
on iPhones, client-server communication, and a remote
search server. This work may open up an exciting new
direction for developing interactive mobile media
applications through the innovative exploitation of
active sensing and query formulation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "40",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Shirmohammadi:2012:ISS,
author = "Shervin Shirmohammadi and Mohamed Hefeeda and Wei
Tsang Ooi and Romulus Grigoras",
title = "Introduction to special section on {$3$D} mobile
multimedia",
journal = j-TOMCCAP,
volume = "8",
number = "3s",
pages = "41:1--41:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348816.2348820",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:07 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "41",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2012:QOV,
author = "Yanwei Liu and Song Ci and Hui Tang and Yun Ye and
Jinxia Liu",
title = "{QoE}-oriented {$3$D} video transcoding for mobile
streaming",
journal = j-TOMCCAP,
volume = "8",
number = "3s",
pages = "42:1--42:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348816.2348821",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:07 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "With advance in mobile 3D display, mobile 3D video is
already enabled by the wireless multimedia networking,
and it will be gradually popular since it can make
people enjoy the natural 3D experience anywhere and
anytime. In current stage, mobile 3D video is generally
delivered over the heterogeneous network combined by
wired and wireless channels. How to guarantee the
optimal 3D visual quality of experience (QoE) for the
mobile 3D video streaming is one of the important
topics concerned by the service provider. In this
article, we propose a QoE-oriented transcoding approach
to enhance the quality of mobile 3D video service. By
learning the pre-controlled QoE patterns of 3D
contents, the proposed 3D visual QoE inferring model
can be utilized to regulate the transcoding
configurations in real-time according to the feedbacks
of network and user-end device information. In the
learning stage, we propose a piecewise linear mean
opinion score (MOS) interpolation method to further
reduce the cumbersome manual work of preparing QoE
patterns. Experimental results show that the proposed
transcoding approach can provide the adapted 3D stream
to the heterogeneous network, and further provide
superior QoE performance to the fixed quantization
parameter (QP) transcoding and mean squared error (MSE)
optimized transcoding for mobile 3D video streaming.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "42",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2012:NVT,
author = "Shujie Liu and Chang Wen Chen",
title = "A novel {$3$D} video transcoding scheme for adaptive
{$3$D} video transmission to heterogeneous terminals",
journal = j-TOMCCAP,
volume = "8",
number = "3s",
pages = "43:1--43:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348816.2348822",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:07 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Three-dimensional video (3DV) is attracting many
interests with its enhanced viewing experience and more
user driven features. 3DV has several unique
characteristics different from 2D video: (1) It has a
much larger amount of data captured and compressed, and
corresponding video compression techniques can be much
more complicated in order to explore data redundancy.
This will lead to more constraints on users' network
access and computational capability, (2) Most users
only need part of the 3DV data at any given time, while
the users' requirements exhibit large diversity, (3)
Only a limited number of views are captured and
transmitted for 3DV. View rendering is thus necessary
to generate virtual views based on the received 3DV
data. However, many terminal devices do not have the
functionality to generate virtual views. To enable 3DV
experience for the majority of users with limited
capabilities, adaptive 3DV transmission is necessary to
extract/generate the required data content and
represent it with supported formats and bitrates for
heterogeneous terminal devices. 3DV transcoding is an
emerging and effective technique to achieve desired
adaptive 3DV transmission. In this article, we propose
the first efficient 3DV transcoding scheme that can
obtain any desired view, either an encoded one or a
virtual one, and compress it with more universal
H.264/AVC. The key idea of the proposed scheme is to
appropriately utilize motion information contained in
the bitstream to generate candidate motion information.
Original information of both the desired view and
reference views are used to obtain this candidate
information and a proper motion refinement process is
carried out for certain blocks. Simulation results show
that, compared to the straightforward cascade
algorithm, the proposed scheme is able to output
compressed bitstream of the required view with
significantly reduced complexity while incurring
negligible performance loss. Such a 3DV transcoding can
be applied to most gateways that usually have
constraints on computational complexity and time
delay.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "43",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Roodaki:2012:NMD,
author = "Hoda Roodaki and Mahmoud Reza Hashemi and Shervin
Shirmohammadi",
title = "A new methodology to derive objective quality
assessment metrics for scalable multiview {$3$D} video
coding",
journal = j-TOMCCAP,
volume = "8",
number = "3s",
pages = "44:1--44:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348816.2348823",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:07 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "With the growing demand for 3D video, efforts are
underway to incorporate it in the next generation of
broadcast and streaming applications and standards. 3D
video is currently available in games, entertainment,
education, security, and surveillance applications. A
typical scenario for multiview 3D consists of several
3D video sequences captured simultaneously from the
same scene with the help of multiple cameras from
different positions and through different angles.
Multiview video coding provides a compact
representation of these multiple views by exploiting
the large amount of inter-view statistical
dependencies. One of the major challenges in this field
is how to transmit the large amount of data of a
multiview sequence over error prone channels to
heterogeneous mobile devices with different bandwidth,
resolution, and processing/battery power, while
maintaining a high visual quality. Scalable Multiview
3D Video Coding (SMVC) is one of the methods to address
this challenge; however, the evaluation of the overall
visual quality of the resulting scaled-down video
requires a new objective perceptual quality measure
specifically designed for scalable multiview 3D video.
Although several subjective and objective quality
assessment methods have been proposed for multiview 3D
sequences, no comparable attempt has been made for
quality assessment of scalable multiview 3D video. In
this article, we propose a new methodology to build
suitable objective quality assessment metrics for
different scalable modalities in multiview 3D video.
Our proposed methodology considers the importance of
each layer and its content as a quality of experience
factor in the overall quality. Furthermore, in addition
to the quality of each layer, the concept of disparity
between layers (inter-layer disparity) and disparity
between the units of each layer (intra-layer disparity)
is considered as an effective feature to evaluate
overall perceived quality more accurately. Simulation
results indicate that by using this methodology, more
efficient objective quality assessment metrics can be
introduced for each multiview 3D video scalable
modalities.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "44",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hamza:2012:EEM,
author = "Ahmed Hamza and Mohamed Hefeeda",
title = "Energy-efficient multicasting of multiview {$3$D}
videos to mobile devices",
journal = j-TOMCCAP,
volume = "8",
number = "3s",
pages = "45:1--45:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348816.2348824",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:07 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Multicasting multiple video streams over wireless
broadband access networks enables the delivery of
multimedia content to large-scale user communities in a
cost-efficient manner. Three dimensional (3D) videos
are the next natural step in the evolution of digital
media technologies. In order to provide 3D perception,
3D video streams contain one or more views that greatly
increase their bandwidth requirements. Due to the
limited channel capacity and variable bit rate of the
videos, multicasting multiple 3D videos over wireless
broadband networks is a challenging problem. In this
article, we consider a 4G wireless access network in
which a number of 3D videos represented in two-view
plus depth format and encoded using scalable video
coders are multicast. We formulate the optimal 3D video
multicasting problem to maximize the quality of
rendered virtual views on the receivers' displays. We
show that this problem is NP-complete and present a
polynomial time approximation algorithm to solve it. We
then extend the proposed algorithm to efficiently
schedule the transmission of the chosen substreams from
each video in order to maximize the power saving on the
mobile receivers. Our simulation-based experimental
results show that our algorithm provides solutions that
are within 0.3 dB of the optimal solutions while
satisfying real-time requirements of multicast systems.
In addition, our algorithm results in an average power
consumption reduction of 86\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "45",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Shi:2012:RTR,
author = "Shu Shi and Klara Nahrstedt and Roy Campbell",
title = "A real-time remote rendering system for interactive
mobile graphics",
journal = j-TOMCCAP,
volume = "8",
number = "3s",
pages = "46:1--46:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348816.2348825",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:07 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Mobile devices are gradually changing people's
computing behaviors. However, due to the limitations of
physical size and power consumption, they are not
capable of delivering a 3D graphics rendering
experience comparable to desktops. Many applications
with intensive graphics rendering workloads are unable
to run on mobile platforms directly. This issue can be
addressed with the idea of remote rendering: the heavy
3D graphics rendering computation runs on a powerful
server and the rendering results are transmitted to the
mobile client for display. However, the simple remote
rendering solution inevitably suffers from the large
interaction latency caused by wireless networks, and is
not acceptable for many applications that have very
strict latency requirements. In this article, we
present an advanced low-latency remote rendering system
that assists mobile devices to render interactive 3D
graphics in real-time. Our design takes advantage of an
image based rendering technique: 3D image warping, to
synthesize the mobile display from the depth images
generated on the server. The research indicates that
the system can successfully reduce the interaction
latency while maintaining the high rendering quality by
generating multiple depth images at the carefully
selected viewpoints. We study the problem of viewpoint
selection, propose a real-time reference viewpoint
prediction algorithm, and evaluate the algorithm
performance with real-device experiments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "46",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Guan:2012:EMM,
author = "Wei Guan and Suya You and Ulrich Newmann",
title = "Efficient matchings and mobile augmented reality",
journal = j-TOMCCAP,
volume = "8",
number = "3s",
pages = "47:1--47:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348816.2348826",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Nov 6 18:13:07 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "With the fast-growing popularity of smart phones in
recent years, augmented reality (AR) on mobile devices
is gaining more attention and becomes more demanding
than ever before. However, the limited processors in
mobile devices are not quite promising for AR
applications that require real-time processing speed.
The challenge exists due to the fact that, while fast
features are usually not robust enough in matchings,
robust features like SIFT or SURF are not
computationally efficient. There is always a tradeoff
between robustness and efficiency and it seems that we
have to sacrifice one for the other. While this is true
for most existing features, researchers have been
working on designing new features with both robustness
and efficiency. In this article, we are not trying to
present a completely new feature. Instead, we propose
an efficient matching method for robust features. An
adaptive scoring scheme and a more distinctive
descriptor are also proposed for performance
improvements. Besides, we have developed an outdoor
augmented reality system that is based on our proposed
methods. The system demonstrates that not only it can
achieve robust matchings efficiently, it is also
capable to handle large occlusions such as passengers
and moving vehicles, which is another challenge for
many AR applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "47",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{TOMCCAP-STAFF:2012:TCO,
author = "{TOMCCAP-STAFF}",
title = "Table of contents: Online supplement volume 8, number
2s, online supplement volume 8, number 3s",
journal = j-TOMCCAP,
volume = "8",
number = "4",
pages = "48:1--48:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2379790.2382432",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:21 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "48",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Steinmetz:2012:E,
author = "Ralf Steinmetz",
title = "Editorial",
journal = j-TOMCCAP,
volume = "8",
number = "4",
pages = "49:1--49:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2379790.2379791",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:21 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "49",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2012:LRC,
author = "Xiaobai Liu and Shuicheng Yan and Bin Cheng and Jinhui
Tang and Tat-Sheng Chua and Hai Jin",
title = "Label-to-region with continuity-biased bi-layer
sparsity priors",
journal = j-TOMCCAP,
volume = "8",
number = "4",
pages = "50:1--50:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2379790.2379792",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:21 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this work, we investigate how to reassign the fully
annotated labels at image level to those contextually
derived semantic regions, namely Label-to-Region (L2R),
in a collective manner. Given a set of input images
with label annotations, the basic idea of our approach
to L2R is to first discover the patch correspondence
across images, and then propagate the common labels
shared in image pairs to these correlated patches.
Specially, our approach consists of following aspects.
First, each of the input images is encoded as a
Bag-of-Hierarchical-Patch (BOP) for capturing the rich
cues at variant scales, and the individual patches are
expressed by patch-level feature descriptors. Second,
we present a sparse representation formulation for
discovering how well an image or a semantic region can
be robustly reconstructed by all the other image
patches from the input image set. The underlying
philosophy of our formulation is that an image region
can be sparsely reconstructed with the image patches
belonging to the other images with common labels, while
the robustness in label propagation across images
requires that these selected patches come from very few
images. This preference of being sparse at both patch
and image level is named bi-layer sparsity prior.
Meanwhile, we enforce the preference of choosing
larger-size patches in reconstruction, referred to as
continuity-biased prior in this work, which may further
enhance the reliability of L2R assignment. Finally, we
harness the reconstruction coefficients to propagate
the image labels to the matched patches, and fuse the
propagation results over all patches to finalize the
L2R task. As a by-product, the proposed
continuity-biased bi-layer sparse representation
formulation can be naturally applied to perform image
annotation on new testing images. Extensive experiments
on three public image datasets clearly demonstrate the
effectiveness of our proposed framework in both L2R
assignment and image annotation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "50",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Rooij:2012:ETS,
author = "Ork De Rooij and Marcel Worring",
title = "Efficient targeted search using a focus and context
video browser",
journal = j-TOMCCAP,
volume = "8",
number = "4",
pages = "51:1--51:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2379790.2379793",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:21 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Currently there are several interactive content-based
video retrieval techniques and systems available.
However, retrieval performance depends heavily on the
means of interaction. We argue that effective CBVR
requires efficient, specialized user interfaces. In
this article we propose guidelines for such an
interface, and we propose an effective CBVR engine: the
ForkBrowser, which builds upon the principle of focus
and context. This browser is evaluated using a
combination of user simulation and real user
evaluation. Results indicate that the ideas have merit,
and that the browser performs very well when compared
to the state-of-the-art in video retrieval.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "51",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ghinea:2012:UPM,
author = "Gheorghita Ghinea and Oluwakemi Ademoye",
title = "User perception of media content association in
olfaction-enhanced multimedia",
journal = j-TOMCCAP,
volume = "8",
number = "4",
pages = "52:1--52:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2379790.2379794",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:21 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Olfaction is an exciting challenge facing multimedia
applications. In this article we have investigated user
perception of the association between olfactory media
content and video media content in olfactory-enhanced
multimedia. Results show that the association between
scent and content has a significant impact on the
user-perceived experience of olfactory-enhanced
multimedia.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "52",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Spicer:2012:NAD,
author = "Ryan Spicer and Yu-Ru Lin and Aisling Kelliher and
Hari Sundaram",
title = "{NextSlidePlease}: Authoring and delivering agile
multimedia presentations",
journal = j-TOMCCAP,
volume = "8",
number = "4",
pages = "53:1--53:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2379790.2379795",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:21 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Presentation support tools, such as Microsoft
PowerPoint, pose challenges both in terms of creating
linear presentations from complex data and fluidly
navigating such linear structures when presenting to
diverse audiences. NextSlidePlease is a slideware
application that addresses these challenges using a
directed graph structure approach for authoring and
delivering multimedia presentations. The application
combines novel approaches for searching and analyzing
presentation datasets, composing meaningfully
structured presentations, and efficiently delivering
material under a variety of time constraints. We
introduce and evaluate a presentation analysis
algorithm intended to simplify the process of authoring
dynamic presentations, and a time management and path
selection algorithm that assists users in prioritizing
content during the presentation process. Results from
two comparative user studies indicate that the directed
graph approach promotes the creation of hyperlinks, the
consideration of connections between content items, and
a richer understanding of the time management
consequences of including and selecting presentation
material.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "53",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Qi:2012:OBI,
author = "Heng Qi and Keqiu Li and Yanming Shen and Wenyu Qu",
title = "Object-based image retrieval with kernel on adjacency
matrix and local combined features",
journal = j-TOMCCAP,
volume = "8",
number = "4",
pages = "54:1--54:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2379790.2379796",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:21 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In object-based image retrieval, there are two
important issues: an effective image representation
method for representing image content and an effective
image classification method for processing user
feedback to find more images containing the
user-desired object categories. In the image
representation method, the local-based representation
is the best selection for object-based image retrieval.
As a kernel-based classification method, Support Vector
Machine (SVM) has shown impressive performance on image
classification. But SVM cannot work on the local-based
representation unless there is an appropriate kernel.
To address this problem, some representative kernels
are proposed in literatures. However, these kernels
cannot work effectively in object-based image retrieval
due to ignoring the spatial context and the combination
of local features. In this article, we present Adjacent
Matrix (AM) and the Local Combined Features (LCF) to
incorporate the spatial context and the combination of
local features into the kernel. We propose the AM-LCF
feature vector to represent image content and the
AM-LCF kernel to measure the similarities between
AM-LCF feature vectors. According to the detailed
analysis, we show that the proposed kernel can overcome
the deficiencies of existing kernels. Moreover, we
evaluate the proposed kernel through experiments of
object-based image retrieval on two public image sets.
The experimental results show that the performance of
object-based image retrieval can be improved by the
proposed kernel.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "54",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Li:2012:VPA,
author = "Guangda Li and Meng Wang and Zheng Lu and Richang Hong
and Tat-Seng Chua",
title = "In-video product annotation with {Web} information
mining",
journal = j-TOMCCAP,
volume = "8",
number = "4",
pages = "55:1--55:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2379790.2379797",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:21 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Product annotation in videos is of great importance
for video browsing, search, and advertisement. However,
most of the existing automatic video annotation
research focuses on the annotation of high-level
concepts, such as events, scenes, and object
categories. This article presents a novel solution to
the annotation of specific products in videos by mining
information from the Web. It collects a set of
high-quality training data for each product by
simultaneously leveraging Amazon and Google image
search engine. A visual signature for each product is
then built based on the bag-of-visual-words
representation of the training images. A correlative
sparsification approach is employed to remove noisy
bins in the visual signatures. These signatures are
used to annotate video frames. We conduct experiments
on more than 1,000 videos and the results demonstrate
the feasibility and effectiveness of our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "55",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Gopinathan:2012:ASO,
author = "Ajay Gopinathan and Zongpeng Li",
title = "Algorithms for stochastic optimization of multicast
content delivery with network coding",
journal = j-TOMCCAP,
volume = "8",
number = "4",
pages = "56:1--56:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2379790.2379798",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:21 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The usage of network resources by content providers is
commonly governed by Service-Level Agreements (SLA)
between the content provider and the network service
provider. Resource usage exceeding the limits specified
in the SLA incurs the content provider additional
charges, usually at a higher cost. Hence, the content
provider's goal is to provision adequate resources in
the SLA based on forecasts of future demand. We study
capacity purchasing strategies when the content
provider employs network coded multicast as the media
delivery mechanism, with uncertainty in its future
customer set explicitly taken into consideration. The
latter requires the content provider to make capacity
provisioning decisions based on market predictions and
historical customer usage patterns. The probabilistic
element suggests a stochastic optimization approach. We
model this problem as a two-stage stochastic
optimization problem with recourse. Such optimizations
are \#P-hard to solve directly, and we design two
approximation algorithms for them. The first is a
heuristic algorithm that exploits properties unique to
network coding, so that only polynomial-time operations
are needed. It performs well in general scenarios, but
the gap from the optimal solution is not bounded by any
constant in the worst case. This motivates our second
approach, a sampling algorithm partly inspired from the
work of Gupta et al. [2004a]. We employ techniques from
duality theory in linear optimization to prove that the
sampling algorithm provides a 3-approximation to the
stochastic multicast problem. We conduct extensive
simulations to illustrate the efficacy of both
algorithms, and show that the performance of both is
usually within 10\% of the optimal solution in
practice.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "56",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hendrikx:2013:PCG,
author = "Mark Hendrikx and Sebastiaan Meijer and Joeri {Van Der
Velden} and Alexandru Iosup",
title = "Procedural content generation for games: a survey",
journal = j-TOMCCAP,
volume = "9",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2422956.2422957",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:22 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Hundreds of millions of people play computer games
every day. For them, game content-from 3D objects to
abstract puzzles-plays a major entertainment role.
Manual labor has so far ensured that the quality and
quantity of game content matched the demands of the
playing community, but is facing new scalability
challenges due to the exponential growth over the last
decade of both the gamer population and the production
costs. Procedural Content Generation for Games (PCG-G)
may address these challenges by automating, or aiding
in, game content generation. PCG-G is difficult, since
the generator has to create the content, satisfy
constraints imposed by the artist, and return
interesting instances for gamers. Despite a large body
of research focusing on PCG-G, particularly over the
past decade, ours is the first comprehensive survey of
the field of PCG-G. We first introduce a comprehensive,
six-layered taxonomy of game content: bits, space,
systems, scenarios, design, and derived. Second, we
survey the methods used across the whole field of PCG-G
from a large research body. Third, we map PCG-G methods
to game content layers; it turns out that many of the
methods used to generate game content from one layer
can be used to generate content from another. We also
survey the use of methods in practice, that is, in
commercial or prototype games. Fourth and last, we
discuss several directions for future research in
PCG-G, which we believe deserve close attention in the
near future.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "1",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2013:IRQ,
author = "Dong Liu and Shuicheng Yan and Rong-Rong Ji and
Xian-Sheng Hua and Hong-Jiang Zhang",
title = "Image retrieval with query-adaptive hashing",
journal = j-TOMCCAP,
volume = "9",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2422956.2422958",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:22 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Hashing-based approximate nearest-neighbor search may
well realize scalable content-based image retrieval.
The existing semantic-preserving hashing methods
leverage the labeled data to learn a fixed set of
semantic-aware hash functions. However, a fixed hash
function set is unable to well encode all semantic
information simultaneously, and ignores the specific
user's search intention conveyed by the query. In this
article, we propose a query-adaptive hashing method
which is able to generate the most appropriate binary
codes for different queries. Specifically, a set of
semantic-biased discriminant projection matrices are
first learnt for each of the semantic concepts, through
which a semantic-adaptable hash function set is learnt
via a joint sparsity variable selection model. At query
time, we further use the sparsity representation
procedure to select the most appropriate hash function
subset that is informative to the semantic information
conveyed by the query. Extensive experiments over three
benchmark image datasets well demonstrate the
superiority of our proposed query-adaptive hashing
method over the state-of-the-art ones in terms of
retrieval accuracy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "2",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zheng:2013:GSD,
author = "Yan-Tao Zheng and Shuicheng Yan and Zheng-Jun Zha and
Yiqun Li and Xiangdong Zhou and Tat-Seng Chua and
Ramesh Jain",
title = "{GPSView}: a scenic driving route planner",
journal = j-TOMCCAP,
volume = "9",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2422956.2422959",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:22 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "GPS devices have been widely used in automobiles to
compute navigation routes to destinations. The
generated driving route targets the minimal traveling
distance, but neglects the sightseeing experience of
the route. In this study, we propose an augmented GPS
navigation system, GPSView, to incorporate a scenic
factor into the routing. The goal of GPSView is to plan
a driving route with scenery and sightseeing qualities,
and therefore allow travelers to enjoy sightseeing on
the drive. To do so, we first build a database of
scenic roadways with vistas of landscapes and sights
along the roadside. Specifically, we adapt an
attention-based approach to exploit
community-contributed GPS-tagged photos on the Internet
to discover scenic roadways. The premise is: a
multitude of photos taken along a roadway imply that
this roadway is probably appealing and catches the
public's attention. By analyzing the geospatial
distribution of photos, the proposed approach discovers
the roadside sight spots, or Points-Of-Interest (POIs),
which have good scenic qualities and visibility to
travelers on the roadway. Finally, we formulate scenic
driving route planning as an optimization task towards
the best trade-off between sightseeing experience and
traveling distance. Testing in the northern California
area shows that the proposed system can deliver
promising results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "3",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhou:2013:SMV,
author = "Wengang Zhou and Houqiang Li and Yijuan Lu and Qi
Tian",
title = "{SIFT} match verification by geometric coding for
large-scale partial-duplicate web image search",
journal = j-TOMCCAP,
volume = "9",
number = "1",
pages = "4:1--4:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2422956.2422960",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:22 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Most large-scale image retrieval systems are based on
the bag-of-visual-words model. However, the traditional
bag-of-visual-words model does not capture the
geometric context among local features in images well,
which plays an important role in image retrieval. In
order to fully explore geometric context of all visual
words in images, efficient global geometric
verification methods have been attracting lots of
attention. Unfortunately, current existing methods on
global geometric verification are either
computationally expensive to ensure real-time response,
or cannot handle rotation well. To solve the preceding
problems, in this article, we propose a novel geometric
coding algorithm, to encode the spatial context among
local features for large-scale partial-duplicate Web
image retrieval. Our geometric coding consists of
geometric square coding and geometric fan coding, which
describe the spatial relationships of SIFT features
into three geo-maps for global verification to remove
geometrically inconsistent SIFT matches. Our approach
is not only computationally efficient, but also
effective in detecting partial-duplicate images with
rotation, scale changes, partial-occlusion, and
background clutter. Experiments in partial-duplicate
Web image search, using two datasets with one million
Web images as distractors, reveal that our approach
outperforms the baseline bag-of-visual-words approach
even following a RANSAC verification in mean average
precision. Besides, our approach achieves comparable
performance to other state-of-the-art global geometric
verification methods, for example, spatial coding
scheme, but is more computationally efficient.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "4",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Park:2013:ISL,
author = "Jong-Seung Park and Ramesh Jain",
title = "Identification of scene locations from geotagged
images",
journal = j-TOMCCAP,
volume = "9",
number = "1",
pages = "5:1--5:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2422956.2422961",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:22 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Due to geotagging capabilities of consumer cameras, it
has become easy to capture the exact geometric location
where a picture is taken. However, the location is not
the whereabouts of the scene taken by the photographer
but the whereabouts of the photographer himself. To
determine the actual location of an object seen in a
photo some sophisticated and tiresome steps are
required on a special camera rig, which are generally
not available in common digital cameras. This article
proposes a novel method to determine the geometric
location corresponding to a specific image pixel. A new
technique of stereo triangulation is introduced to
compute the relative depth of a pixel position.
Geographical metadata embedded in images are utilized
to convert relative depths to absolute coordinates.
When a geographic database is available we can also
infer the semantically meaningful description of a
scene object from where the specified pixel is
projected onto the photo. Experimental results
demonstrate the effectiveness of the proposed approach
in accurately identifying actual locations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "5",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wang:2013:RAA,
author = "Yichuan Wang and Ting-An Lin and Cheng-Hsin Hsu and
Xin Liu",
title = "Region- and action-aware virtual world clients",
journal = j-TOMCCAP,
volume = "9",
number = "1",
pages = "6:1--6:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2422956.2422962",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:22 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We propose region- and action-aware virtual world
clients. To develop such clients, we present a
parameterized network traffic model, based on a large
collection of Second Life traces gathered by us. Our
methodology is also applicable to virtual worlds other
than Second Life. With the traffic model, various
optimization criteria can be adopted, including visual
quality, response time, and energy consumption. We use
energy consumption as the show case, and demonstrate
via trace-driven simulations that, compared to two
existing schemes, a mobile client can save up to 36\%
and 41\% communication energy by selectively turning on
its WiFi network interface.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "6",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Khodabakhshi:2013:SSF,
author = "Naghmeh Khodabakhshi and Mohamed Hefeeda",
title = "{Spider}: a system for finding {$3$D} video copies",
journal = j-TOMCCAP,
volume = "9",
number = "1",
pages = "7:1--7:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2422956.2422963",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:22 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents a novel content-based copy
detection system for 3D videos. The system creates
compact and robust depth and visual signatures from the
3D videos. Then, signature of a query video is compared
against an indexed database of reference videos'
signatures. The system returns a score, using both
spatial and temporal characteristics of videos,
indicating whether the query video matches any video in
the reference video database, and in case of matching,
which portion of the reference video matches the query
video. Analysis shows that the system is efficient,
both computationally and storage-wise. The system can
be used, for example, by video content owners, video
hosting sites, and third-party companies to find
illegally copied 3D videos. We implemented Spider, a
complete realization of the proposed system, and
conducted rigorous experiments on it. Our experimental
results show that the proposed system can achieve high
accuracy in terms of precision and recall even if the
3D videos are subjected to several transformations at
the same time. For example, the proposed system yields
100\% precision and recall when copied videos are parts
of original videos, and more than 90\% precision and
recall when copied videos are subjected to different
individual transformations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "7",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Abrams:2013:WAG,
author = "Austin Abrams and Robert Pless",
title = "{Web}-accessible geographic integration and
calibration of webcams",
journal = j-TOMCCAP,
volume = "9",
number = "1",
pages = "8:1--8:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2422956.2422964",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Sun May 5 09:14:22 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "A global network of webcams offers unique viewpoints
from tens of thousands of locations. Understanding the
geographic context of this imagery is vital in using
these cameras for quantitative environmental monitoring
or surveillance applications. We derive robust
geo-calibration constraints that allow users to
geo-register static or pan-tilt-zoom cameras by
specifying a few corresponding points, and describe our
Web interface suitable for novices. We discuss design
decisions that support our scalable, publicly
accessible Web service that allows webcam textures to
be displayed live on 3D geographic models. Finally, we
demonstrate several multimedia applications for
geo-calibrated cameras.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "8",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Steinmetz:2013:EN,
author = "Ralf Steinmetz",
title = "Editorial note",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "31:1--31:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2523001.2523002",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "31",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Nahrstedt:2013:ISS,
author = "Klara Nahrstedt and Rainer Lienhart and Malcolm
Slaney",
title = "Introduction to the special section on the 20th
anniversary of the {ACM International Conference on
Multimedia}",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "32:1--32:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2523001.2523003",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "32",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Li:2013:TDI,
author = "Baochun Li and Zhi Wang and Jiangchuan Liu and Wenwu
Zhu",
title = "Two decades of {Internet} video streaming: a
retrospective view",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "33:1--33:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2505805",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "For over two decades, video streaming over the
Internet has received a substantial amount of attention
from both academia and industry. Starting from the
design of transport protocols for streaming video,
research interests have later shifted to the
peer-to-peer paradigm of designing streaming protocols
at the application layer. More recent research has
focused on building more practical and scalable
systems, using Dynamic Adaptive Streaming over HTTP. In
this article, we provide a retrospective view of the
research results over the past two decades, with a
focus on peer-to-peer streaming protocols and the
effects of cloud computing and social media.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "33",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Huang:2013:ETM,
author = "Zixia Huang and Klara Nahrstedt and Ralf Steinmetz",
title = "Evolution of temporal multimedia synchronization
principles: a historical viewpoint",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "34:1--34:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2490821",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The evolution of multimedia applications has
drastically changed human life and behaviors. New
communication technologies lead to new requirements for
multimedia synchronization. This article presents a
historical view of temporal synchronization studies
focusing on continuous multimedia. We demonstrate how
the development of multimedia systems has created new
challenges for synchronization technologies. We
conclude with a new application-dependent,
multilocation, multirequirement synchronization
framework to address these new challenges.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "34",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Bulterman:2013:SAM,
author = "Dick C. A. Bulterman and Pablo Cesar and Rodrigo
Laiola Guimar{\~a}es",
title = "Socially-aware multimedia authoring: {Past}, present,
and future",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "35:1--35:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2491893",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Creating compelling multimedia productions is a
nontrivial task. This is as true for creating
professional content as it is for nonprofessional
editors. During the past 20 years, authoring networked
content has been a part of the research agenda of the
multimedia community. Unfortunately, authoring has been
seen as an initial enterprise that occurs before `real'
content processing takes place. This limits the options
open to authors and to viewers of rich multimedia
content for creating and receiving focused, highly
personal media presentations. This article reflects on
the history of multimedia authoring. We focus on the
particular task of supporting socially-aware
multimedia, in which the relationships within
particular social groups among authors and viewers can
be exploited to create highly personal media
experiences. We provide an overview of the requirements
and characteristics of socially-aware multimedia
authoring within the context of exploiting community
content. We continue with a short historical
perspective on authoring support for these types of
situations. We then present an overview of a current
system for supporting socially-aware multimedia
authoring within the community content. We conclude
with a discussion of the issues that we feel can
provide a fruitful basis for future multimedia
authoring support. We argue that providing support for
socially-aware multimedia authoring can have a profound
impact on the nature and architecture of the entire
multimedia information processing pipeline.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "35",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhang:2013:IST,
author = "Lei Zhang and Yong Rui",
title = "Image search-from thousands to billions in 20 years",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "36:1--36:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2490823",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents a comprehensive review and
analysis on image search in the past 20 years,
emphasizing the challenges and opportunities brought by
the astonishing increase of dataset scales from
thousands to billions in the same time period, which
was witnessed first-hand by the authors as active
participants in this research area. Starting with a
retrospective review of three stages of image search in
the history, the article highlights major breakthroughs
around the year 2000 in image search features, indexing
methods, and commercial systems, which marked the
transition from stage two to stage three. Subsequent
sections describe the image search research from four
important aspects: system framework, feature extraction
and image representation, indexing, and big data's
potential. Based on the review, the concluding section
discusses open research challenges and suggests future
research directions in effective visual representation,
image knowledge base construction, implicit user
feedback and crowdsourcing, mobile image search, and
creative multimedia interfaces.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "36",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Rowe:2013:LFY,
author = "Lawrence A. Rowe",
title = "Looking forward 10 years to multimedia successes",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "37:1--37:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2490825",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "A panel at ACM Multimedia 2012 addressed research
successes in the past 20 years. While the panel focused
on the past, this article discusses successes since the
ACM SIGMM 2003 Retreat and suggests research directions
in the next ten years. While significant progress has
been made, more research is required to allow
multimedia to impact our everyday computing
environment. The importance of hardware changes on
future research directions is discussed. We believe
ubiquitous computing-meaning abundant computation and
network bandwidth-should be applied in novel ways to
solve multimedia grand challenges and continue the IT
revolution of the past century.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "37",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Shenoy:2013:MSR,
author = "Prashant Shenoy",
title = "Multimedia systems research: {The} first twenty years
and lessons for the next twenty",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "38:1--38:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2490859",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This retrospective article examines the past two
decades of multimedia systems research through the lens
of three research topics that were in vogue in the
early days of the field and offers perspectives on the
evolution of these research topics. We discuss the
eventual impact of each line of research and offer
lessons for future research in the field.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "38",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hua:2013:OVD,
author = "Kien A. Hua",
title = "Online video delivery: {Past}, present, and future",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "39:1--39:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2502435",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Video streaming is the core technology for online
video delivery systems. Initial research on this
technology faced many challenges. In this article,
lessons learned from beginning trials are discussed;
some pioneering works that provided early solutions and
inspired subsequent research are presented; and new
techniques required for emerging applications are
examined.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "39",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Swaminathan:2013:WMV,
author = "Viswanathan Swaminathan",
title = "Are we in the middle of a video streaming
revolution?",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "40:1--40:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2490826",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "It has been roughly 20 years since the beginning of
video streaming over the Internet. Until very recently,
video streaming experiences left much to be desired.
Over the last few years, this has significantly
improved making monetization of streaming, possible.
Recently, there has been an explosion of commercial
video delivery services over the Internet, sometimes
referred to as over-the-top (OTT) delivery. All these
services invariably use streaming technologies.
Initially, streaming had all the promise, then for a
long time, it was download and play, later progressive
download for short content, and now it is streaming
again. Did streaming win the download versus streaming
contest? Did the best technology win? The improvement
in streaming experience has been possible through a
variety of new streaming technologies, some proprietary
and others extensions to standard protocols. The
primary delivery mechanism for entertainment video,
both premium content like movies and user generated
content (UGC), tends to be HTTP streaming. Is HTTP
streaming the panacea for all problems? The goal of
this article is to give an industry perspective of what
fundamentally changed in video streaming that makes it
commercially viable now. This article outlines how a
blend of technology choices between download and
streaming makes the current wave of ubiquitous
streaming possible for entertainment video delivery.
After identifying problems that still need to be
solved, the article concludes with the lessons learnt
from the video streaming evolution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "40",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chou:2013:AIC,
author = "Philip A. Chou",
title = "Advances in immersive communication: (1) {Telephone},
(2) {Television}, (3) {Teleportation}",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "41:1--41:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2492704",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The last great advances in immersive communication
were the invention of the telephone over 137 years ago
and the invention of the video telephone (n{\'e}
television) over 86 years ago. However, a perfect storm
is brewing for the next advance in immersive
communication, thanks to the convergence of massive
amounts of computation, bandwidth, resolution, new
sensors, and new displays. It could well be the
Multimedia community that turns this brew into the next
great advance in immersive communication, something
akin to teleportation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "41",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chang:2013:HFW,
author = "Shih-Fu Chang",
title = "How far we've come: {Impact} of 20 years of multimedia
information retrieval",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "42:1--42:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2491844",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article reviews the major research trends that
emerged in the last two decades within the broad area
of multimedia information retrieval, with a focus on
the ACM Multimedia community. Trends are defined
(nonscientifically) to be topics that appeared in ACM
multimedia publications and have had a significant
number of citations. The article also assesses the
impacts of these trends on real-world applications. The
views expressed are subjective and likely biased but
hopefully useful for understanding the heritage of the
community and stimulating new research direction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "42",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Effelsberg:2013:PLB,
author = "Wolfgang Effelsberg",
title = "A personal look back at twenty years of research in
multimedia content analysis",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "43:1--43:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2502434",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This paper is a personal look back at twenty years of
research in multimedia content analysis. It addresses
the areas of audio, photo and video analysis for the
purpose of indexing and retrieval from the perspective
of a multimedia researcher. Whereas a general analysis
of content is impossible due to the personal bias of
the user, significant progress was made in the
recognition of specific objects or events. The paper
concludes with a brief outlook on the future.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "43",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hanjalic:2013:MRM,
author = "Alan Hanjalic",
title = "Multimedia retrieval that matters",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "44:1--44:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2490827",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article emphasizes the need to refocus multimedia
information retrieval (MIR) research towards bridging
the utility gap, the gap between the expected and
defacto usefulness of MIR solutions. This requires us
to revisit the notion of relevance, but also to
consider other criteria for assessing MIR solutions,
like the informativeness of the retrieved results and
how helpful they are for the users. The article also
states that this focus shift cannot be realized
incrementally, but by revisiting the foundations of MIR
solutions, that is, by a utility-by-design approach. In
this respect, a number of research challenges are
proposed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "44",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Turk:2013:TYE,
author = "Matthew Turk",
title = "Over twenty years of eigenfaces",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "45:1--45:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2490824",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The inaugural ACM Multimedia Conference coincided with
a surge of interest in computer vision technologies for
detecting and recognizing people and their activities
in images and video. Face recognition was the first of
these topics to broadly engage the vision and
multimedia research communities. The Eigenfaces
approach was, deservedly or not, the method that
captured much of the initial attention, and it
continues to be taught and used as a benchmark over 20
years later. This article is a brief personal view of
the genesis of Eigenfaces for face recognition and its
relevance to the multimedia community.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "45",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Whitman:2013:CSF,
author = "Brian Whitman",
title = "Care and scale: {Fifteen} years of music retrieval",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "46:1--46:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2492703",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The co-founder of The Echo Nest, a music intelligence
company that now powers recommendation and discovery
for most music services, discusses the notion of care
and scale, cultural analysis of music, a brief history
of music retrieval, and how and why The Echo Nest got
started.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "46",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Szeliski:2013:NWC,
author = "Richard Szeliski and Noah Snavely and Steven M.
Seitz",
title = "Navigating the worldwide community of photos",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "47:1--47:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2492208",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The last decade has seen an explosion in the number of
photographs available on the Internet. The sheer volume
of interesting photos makes it a challenge to explore
this space. Various Web and social media sites, along
with search and indexing techniques, have been
developed in response. One natural way to navigate
these images in a 3D geo-located context. In this
article, we reflect on our work in this area, with a
focus on techniques that build partial 3D scene models
to help find and navigate interesting photographs in an
interactive, immersive 3D setting. We also discuss how
finding such relationships among photographs opens up
exciting new possibilities for multimedia authoring,
visualization, and editing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "47",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Andre:2013:EUU,
author = "Elisabeth Andre",
title = "Exploiting unconscious user signals in multimodal
human-computer interaction",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "48:1--48:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2502433",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents the idea of empathic stimulation
that relies on the power and potential of unconsciously
conveyed attentive and emotional information to
facilitate human-machine interaction. Starting from a
historical review of related work presented at past ACM
Multimedia conferences, we discuss challenges that
arise when exploiting unconscious human signals for
empathic stimulation, such as the real-time analysis of
psychological user states and the smooth adaptation of
the human-machine interface based on this analysis. A
classical application field that might benefit from the
idea of unconscious human-computer interaction is the
exploration of massive datasets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "48",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Sundaram:2013:EMS,
author = "Hari Sundaram",
title = "Experiential media systems",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "49:1--49:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2502432",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents a personalized narrative on the
early discussions within the Multimedia community and
the subsequent research on experiential media systems.
I discuss two different research initiatives-design of
real-time, immersive multimedia feedback environments
for stroke rehabilitation; exploratory environments for
events that exploited the user's ability to make
connections. I discuss the issue of foundations: the
question of multisensory integration and
superadditivity; the need for identification of
``first-class'' Multimedia problems; expanding the
scope of Multimedia research.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "49",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Kompatsiaris:2013:ISS,
author = "Ioannis (Yiannis) Kompatsiaris and Wenjun (Kevin) Zeng
and Gang Hua and Liangliang Cao",
title = "Introduction to the special section of best papers of
{ACM} multimedia 2012",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "50:1--50:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2523001.2523004",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "50",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2013:RAM,
author = "Heng Liu and Tao Mei and Houqiang Li and Jiebo Luo and
Shipeng Li",
title = "Robust and accurate mobile visual localization and its
applications",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "51:1--51:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2491735",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Mobile applications are becoming increasingly popular.
More and more people are using their phones to enjoy
ubiquitous location-based services (LBS). The
increasing popularity of LBS creates a fundamental
problem: mobile localization. Besides traditional
localization methods that use GPS or wireless signals,
using phone-captured images for localization has drawn
significant interest from researchers. Photos contain
more scene context information than the embedded
sensors, leading to a more precise location
description. With the goal being to accurately sense
real geographic scene contexts, this article presents a
novel approach to mobile visual localization according
to a given image (typically associated with a rough GPS
position). The proposed approach is capable of
providing a complete set of more accurate parameters
about the scene geo-context including the real
locations of both the mobile user and perhaps more
importantly the captured scene, as well as the viewing
direction. To figure out how to make image localization
quick and accurate, we investigate various techniques
for large-scale image retrieval and 2D-to-3D matching.
Specifically, we first generate scene clusters using
joint geo-visual clustering, with each scene being
represented by a reconstructed 3D model from a set of
images. The 3D models are then indexed using a visual
vocabulary tree structure. Taking geo-tags of the
database image as prior knowledge, a novel
location-based codebook weighting scheme proposed to
embed this additional information into the codebook.
The discriminative power of the codebook is enhanced,
thus leading to better image retrieval performance. The
query image is aligned with the models obtained from
the image retrieval results, and eventually registered
to a real-world map. We evaluate the effectiveness of
our approach using several large-scale datasets and
achieving estimation accuracy of a user's location
within 13 meters, viewing direction within 12 degrees,
and viewing distance within 26 meters. Of particular
note is our showcase of three novel applications based
on localization results: (1) an on-the-spot tour guide,
(2) collaborative routing, and (3) a sight-seeing
guide. The evaluations through user studies demonstrate
that these applications are effective in facilitating
the ideal rendezvous for mobile users.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "51",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wang:2013:PBS,
author = "Zhi Wang and Wenwu Zhu and Xiangwen Chen and Lifeng
Sun and Jiangchuan Liu and Minghua Chen and Peng Cui
and Shiqiang Yang",
title = "Propagation-based social-aware multimedia content
distribution",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "52:1--52:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2523001.2523005",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Online social networks have reshaped how multimedia
contents are generated, distributed, and consumed on
today's Internet. Given the massive number of
user-generated contents shared in online social
networks, users are moving to directly access these
contents in their preferred social network services. It
is intriguing to study the service provision of social
contents for global users with satisfactory quality of
experience. In this article, we conduct large-scale
measurement of a real-world online social network
system to study the social content propagation. We have
observed important propagation patterns, including
social locality, geographical locality, and temporal
locality. Motivated by the measurement insights, we
propose a propagation-based social-aware delivery
framework using a hybrid edge-cloud and peer-assisted
architecture. We also design replication strategies for
the architecture based on three propagation predictors
designed by jointly considering user, content, and
context information. In particular, we design a
propagation region predictor and a global audience
predictor to guide how the edge-cloud servers backup
the contents, and a local audience predictor to guide
how peers cache the contents for their friends. Our
trace-driven experiments further demonstrate the
effectiveness and superiority of our design.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "52",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Sang:2013:SIA,
author = "Jitao Sang and Changsheng Xu",
title = "Social influence analysis and application on
multimedia sharing websites",
journal = j-TOMCCAP,
volume = "9",
number = "1s",
pages = "53:1--53:??",
month = oct,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2502436",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:45 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Social media is becoming popular these days, where
users necessarily interact with each other to form
social networks. Influence network, as one special case
of social network, has been recognized as significantly
impacting social activities and user decisions. We
emphasize in this article that the inter-user influence
is essentially topic-sensitive, as for different tasks
users tend to trust different influencers and be
influenced most by them. While existing research
focuses on global influence modeling and applies to
text-based networks, this work investigates the problem
of topic-sensitive influence modeling in the multimedia
domain. According to temporal data justification, we
propose a multimodal probabilistic model, considering
both users' textual annotation and uploaded visual
images. This model is capable of simultaneously
extracting user topic distributions and topic-sensitive
influence strengths. By identifying the topic-sensitive
influencer, we are able to conduct applications, like
collective search and collaborative recommendation. A
risk minimization-based general framework for
personalized image search is further presented, where
the image search task is transferred to measure the
distance of image and personalized query language
models. The framework considers the noisy tag issue and
enables easy incorporation of social influence. We have
conducted experiments on a large-scale Flickr dataset.
Qualitative as well as quantitative evaluation results
have validated the effectiveness of the topic-sensitive
influencer mining model, and demonstrated the advantage
of incorporating topic-sensitive influence in
personalized image search and topic-based image
recommendation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "53",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Silva:2013:HPH,
author = "Juan M. Silva and Mauricio Orozco and Jongeun Cha and
Abdulmotaleb {El Saddik} and Emil M. Petriu",
title = "Human perception of haptic-to-video and
haptic-to-audio skew in multimedia applications",
journal = j-TOMCCAP,
volume = "9",
number = "2",
pages = "9:1--9:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2457450.2457451",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:48 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The purpose of this research is to assess the
sensitivity of humans to perceive asynchrony among
media signals coming from a computer application.
Particularly we examine haptic-to-video and
haptic-to-audio skew. For this purpose we have designed
an experimental setup, where users are exposed to a
basic multimedia presentation resembling a ping-pong
game. For every collision between a ball and a racket,
the user is able to perceive auditory, visual, and
haptic cues about the collision event. We artificially
introduce negative and positive delay to the auditory
and visual cues with respect to the haptic stream. We
subjectively evaluate the perception of inter-stream
asynchrony perceived by the users using two types of
haptic devices. The statistical results of our
evaluation show perception rates of around 100 ms
regardless of modality and type of device.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "9",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Bhatt:2013:RPB,
author = "Chidansh A. Bhatt and Pradeep K. Atrey and Mohan S.
Kankanhalli",
title = "A reward-and-punishment-based approach for concept
detection using adaptive ontology rules",
journal = j-TOMCCAP,
volume = "9",
number = "2",
pages = "10:1--10:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2457450.2457452",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:48 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Despite the fact that performance improvements have
been reported in the last years, semantic concept
detection in video remains a challenging problem.
Existing concept detection techniques, with ontology
rules, exploit the static correlations among primitive
concepts but not the dynamic spatiotemporal
correlations. The proposed method rewards (or punishes)
detected primitive concepts using dynamic
spatiotemporal correlations of the given ontology rules
and updates these ontology rules based on the accuracy
of detection. Adaptively learned ontology rules
significantly help in improving the overall accuracy of
concept detection as shown in the experimental
result.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "10",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Alsulaiman:2013:IVB,
author = "Fawaz A. Alsulaiman and Nizar Sakr and Julio J.
Vald{\'e}s and Abdulmotaleb {El Saddik}",
title = "Identity verification based on handwritten signatures
with haptic information using genetic programming",
journal = j-TOMCCAP,
volume = "9",
number = "2",
pages = "11:1--11:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2457450.2457453",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:48 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article, haptic-based handwritten signature
verification using Genetic Programming (GP)
classification is presented. A comparison of GP-based
classification with classical classifiers including
support vector machine, $k$-nearest neighbors,
na{\"\i}ve Bayes, and random forest is conducted. In
addition, the use of GP in discovering small
knowledge-preserving subsets of features in
high-dimensional datasets of haptic-based signatures is
investigated and several approaches are explored.
Subsets of features extracted from GP-generated models
(analytic functions) are also exploited to determine
the importance and relevance of different haptic data
types (e.g., force, position, torque, and orientation)
in user identity verification. The results revealed
that GP classifiers compare favorably with the
classical methods and use a much fewer number of
attributes (with simple function sets).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "11",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhang:2013:MAS,
author = "Qianni Zhang and Ebroul Izquierdo",
title = "Multifeature analysis and semantic context learning
for image classification",
journal = j-TOMCCAP,
volume = "9",
number = "2",
pages = "12:1--12:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2457450.2457454",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:48 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article introduces an image classification
approach in which the semantic context of images and
multiple low-level visual features are jointly
exploited. The context consists of a set of semantic
terms defining the classes to be associated to
unclassified images. Initially, a multiobjective
optimization technique is used to define a multifeature
fusion model for each semantic class. Then, a Bayesian
learning procedure is applied to derive a context model
representing relationships among semantic classes.
Finally, this context model is used to infer object
classes within images. Selected results from a
comprehensive experimental evaluation are reported to
show the effectiveness of the proposed approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "12",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhao:2013:MEU,
author = "Zhen Wei Zhao and Sameer Samarth and Wei Tsang Ooi",
title = "Modeling the effect of user interactions on mesh-based
{P2P VoD} streaming systems",
journal = j-TOMCCAP,
volume = "9",
number = "2",
pages = "13:1--13:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2457450.2457455",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:48 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "User interactions such as seeks and pauses are widely
supported by existing Peer-to-Peer Video-on-Demand (P2P
VoD) streaming systems. Their effect on the streaming
system, however, has not been well studied. Seeks cause
peers to skip part of the video, making them stay in
the system for shorter time, and thus contribute less.
On the other hand, only part of the video is downloaded
due to seeks, reducing peers' demand from the system.
It is unclear which factor dominates the effect of
seeks on the streaming system. Pauses during playback,
on one hand, allow peers to stay longer in the system
and upload more content. When interleaved with seeks,
however, long pauses may increase peers' demand
unnecessarily as peers may download content that will
eventually be skipped by subsequent forward seeks. The
collective effect of seeks and pauses, together with
the known random peer departure, is unintuitive and
needs to be addressed properly so as to understand the
effect of human factors on the streaming system
performance. In this article, we develop an analytical
model to both qualitatively and quantitatively study
the effect of seeks and pauses on mesh-based P2P VoD
streaming systems, in particular, the effect on the
server cost. Our model can help in understanding how
human factors such as seeks and pauses affect the
streaming system performance, tuning a P2P VoD system
towards better system performance and stability, and
providing a framework for capacity planning.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "13",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yang:2013:ETT,
author = "Yang Yang and Yi Yang and Heng Tao Shen",
title = "Effective transfer tagging from image to video",
journal = j-TOMCCAP,
volume = "9",
number = "2",
pages = "14:1--14:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2457450.2457456",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:48 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Recent years have witnessed a great explosion of
user-generated videos on the Web. In order to achieve
an effective and efficient video search, it is critical
for modern video search engines to associate videos
with semantic keywords automatically. Most of the
existing video tagging methods can hardly achieve
reliable performance due to deficiency of training
data. It is noticed that abundant well-tagged data are
available in other relevant types of media (e.g.,
images). In this article, we propose a novel video
tagging framework, termed as Cross-Media Tag Transfer
(CMTT), which utilizes the abundance of well-tagged
images to facilitate video tagging. Specifically, we
build a ``cross-media tunnel'' to transfer knowledge
from images to videos. To this end, an optimal kernel
space, in which distribution distance between images
and video is minimized, is found to tackle the
domain-shift problem. A novel cross-media video tagging
model is proposed to infer tags by exploring the
intrinsic local structures of both labeled and
unlabeled data, and learn reliable video classifiers.
An efficient algorithm is designed to optimize the
proposed model in an iterative and alternative way.
Extensive experiments illustrate the superiority of our
proposal compared to the state-of-the-art algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "14",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhao:2013:AAP,
author = "Zhen Wei Zhao and Wei Tsang Ooi",
title = "{APRICOD}: an access-pattern-driven distributed
caching middleware for fast content discovery of
noncontinuous media access",
journal = j-TOMCCAP,
volume = "9",
number = "2",
pages = "15:1--15:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2457450.2457457",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:48 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Content discovery is a major source of latency in
peer-to-peer (P2P) media streaming systems, especially
in the presence of noncontinuous user access, such as
random seek in Video-on-Demand (VoD) streaming and
teleportation in a Networked Virtual Environment (NVE).
After the aforementioned user interactions, streaming
systems often need to initiate the content discovery
process to identify where to retrieve the requested
media objects. Short content lookup latency is demanded
to ensure smooth user experience. Existing content
discovery systems based on either a Distributed Hash
Table (DHT) or gossip mechanism cannot cope with
noncontinuous access efficiently due to their long
lookup latency. In this work, we propose an
access-pattern-driven distributed caching middleware
named APRICOD, which caters for fast and scalable
content discovery in peer-to-peer media streaming
systems, especially when user interactions are present.
APRICOD exploits correlations among media objects
accessed by users, and adapts to shift in the user
access pattern automatically. We first present a
general APRICOD design that can be used with any
existing content discovery system. We then present an
implementation of APRICOD on top of Pastry, which we
use to evaluate APRICOD. Our evaluation in a 1024-node
system, using a Second Life trace with 5,735 users and
a VoD trace with 54 users, shows that APRICOD can
effectively resolve all continuous access queries with
a single hop deterministically with node failure as an
exception, and resolve noncontinuous access queries
with a single hop with high probability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "15",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Anonymous:2013:CPM,
author = "Anonymous",
title = "Call for papers: {Multiple} sensorial {(MulSeMedia)}
multi-modal media: {Advances} and applications",
journal = j-TOMCCAP,
volume = "9",
number = "3",
pages = "15:1--15:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2487268.2500818",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:50 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "15",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Mei:2013:NLS,
author = "Tao Mei and Lin-Xie Tang and Jinhui Tang and
Xian-Sheng Hua",
title = "Near-lossless semantic video summarization and its
applications to video analysis",
journal = j-TOMCCAP,
volume = "9",
number = "3",
pages = "16:1--16:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2487268.2487269",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:50 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The ever increasing volume of video content on the Web
has created profound challenges for developing
efficient indexing and search techniques to manage
video data. Conventional techniques such as video
compression and summarization strive for the two
commonly conflicting goals of low storage and high
visual and semantic fidelity. With the goal of
balancing both video compression and summarization,
this article presents a novel approach, called
Near-Lossless Semantic Summarization (NLSS), to
summarize a video stream with the least high-level
semantic information loss by using an extremely small
piece of metadata. The summary consists of compressed
image and audio streams, as well as the metadata for
temporal structure and motion information. Although at
a very low compression rate (around $ 1 / 4 $0; of
H.264 baseline, where traditional compression
techniques can hardly preserve an acceptable visual
fidelity), the proposed NLSS still can be applied to
many video-oriented tasks, such as visualization,
indexing and browsing, duplicate detection, concept
detection, and so on. We evaluate the NLSS on TRECVID
and other video collections, and demonstrate that it is
a powerful tool for significantly reducing storage
consumption, while keeping high-level semantic
fidelity.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "16",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ademoye:2013:IRT,
author = "Oluwakemi A. Ademoye and Gheorghita Ghinea",
title = "Information recall task impact in olfaction-enhanced
multimedia",
journal = j-TOMCCAP,
volume = "9",
number = "3",
pages = "17:1--17:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2487268.2487270",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:50 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Enhancing multimedia applications with olfactory
sensations is one of the last challenges in the area.
While there is evidence, both scientific and anecdotal,
that olfactory cues help users in information recall
tasks, there is a lack of work when the targeted
information is one contained in a multimedia
presentation, which is precisely the focus of this
article. Accordingly, we present the results of two
experimental studies. The first study measured the
impact of olfactory media variation on the user's
ability to perceive, synthesize, and analyze the
informational content of olfactory-enhanced multimedia
videos; the second study measured the impact of
information content, and an information recall task in
respect of user perception of the relevance, sense of
reality, and acceptability of the olfactory media
content, as well as the overall enjoyment of the
experience. Results show that the use of olfactory
media content, both pleasant and unpleasant, in
multimedia displays does not significantly impact on
information assimilation in a negative way. Moreover,
the addition of a performance task may enhance the
user's understanding of the correlation between the
characteristic odor(s) and the scenario under
consideration, as well as enable users to consciously
learn the odors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "17",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yeh:2013:CAS,
author = "Lo-Yao Yeh and Jiun-Long Huang",
title = "A conditional access system with efficient key
distribution and revocation for mobile pay-{TV}
systems",
journal = j-TOMCCAP,
volume = "9",
number = "3",
pages = "18:1--18:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2487268.2487271",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:50 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Current mobile pay-TV systems have two types of
Conditional Access Systems (CAS): group-key-based and
public-key systems. The best feature of group-key-based
systems is the ability to enjoy the broadcast nature in
delivery multimedia contents, while the major advantage
of public-key systems is consolidating the security
foundation to withstand various attacks, such as
collusion attacks. However, the problems of
group-key-based systems include collusion attacks, lack
of nonrepudiation, and troublesome key distribution.
Even worse, the benefit of broadcast efficiency is
confined to a group size of no more than 512
subscribers. For public-key systems, the poor delivery
scalability is the major shortcoming because the unique
private key feature is only suitable for one-to-one
delivery. In this article, we introduce a scalable
access control scheme to integrate the merits of
broadcasting regardless of group size and sound
security assurance, including fine-grained access
control and collusion attack resistance. For subscriber
revocation, a single message is broadcast to the other
subscribers to get the updated key, thus significantly
boosting subscriber revocation scalability. Due to
mobile subscribers' dynamic movements, this article
also analyzes the benefit of retransmission cases in
our system. Through the performance evaluation and
functionality comparison, the proposed scheme should be
a decent candidate to enhance the security strength and
transmission efficiency in a mobile pay-TV system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "18",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Naskar:2013:GTL,
author = "Ruchira Naskar and Rajat Subhra Chakraborty",
title = "A generalized tamper localization approach for
reversible watermarking algorithms",
journal = j-TOMCCAP,
volume = "9",
number = "3",
pages = "19:1--19:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2487268.2487272",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:50 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In general reversible watermarking algorithms, the
convention is to reject the entire cover image at the
receiver end if it fails authentication, since there is
no way to detect the exact locations of tampering. This
feature may be exploited by an adversary to bring about
a form of DoS attack. Here we provide a solution to
this problem in form of a tamper localization mechanism
for reversible watermarking algorithms, which allows
selective rejection of distorted cover image regions in
case of authentication failure, thus avoiding rejection
of the complete image. Additionally it minimizes the
bandwidth requirement of the communication channel.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "19",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Doherty:2013:SSA,
author = "Jonathan Doherty and Kevin Curran and Paul Mckevitt",
title = "A self-similarity approach to repairing large dropouts
of streamed music",
journal = j-TOMCCAP,
volume = "9",
number = "3",
pages = "20:1--20:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2487268.2487273",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:50 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Enjoyment of audio has now become about flexibility
and personal freedom. Digital audio content can be
acquired from many sources and wireless networking
allows digital media devices and associated peripherals
to be unencumbered by wires. However, despite recent
improvements in capacity and quality of service,
wireless networks are inherently unreliable
communications channels for the streaming of audio,
being susceptible to the effects of range,
interference, and occlusion. This time-varying
reliability of wireless audio transfer introduces data
corruption and loss, with unpleasant audible effects
that can be profound and prolonged in duration.
Traditional communications techniques for error
mitigation perform poorly and in a bandwidth
inefficient manner in the presence of such large-scale
defects in a digital audio stream. A novel solution
that can complement existing techniques takes account
of the semantics and natural repetition of music.
Through the use of self-similarity metadata, missing or
damaged audio segments can be seamlessly replaced with
similar undamaged segments that have already been
successfully received. We propose a technology to
generate relevant self-similarity metadata for
arbitrary audio material and to utilize this metadata
within a wireless audio receiver to provide
sophisticated and real-time correction of large-scale
errors. The primary objectives are to match the current
section of a song being received with previous sections
while identifying incomplete sections and determining
replacements based on previously received portions of
the song. This article outlines our approach to Forward
Error Correction (FEC) technology that is used to
``repair'' a bursty dropout when listening to
time-dependent media on a wireless network. Using
self-similarity analysis on a music file, we can
``automatically'' repair the dropout with a similar
portion of the music already received thereby
minimizing a listener's discomfort.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "20",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ho:2013:IPC,
author = "Edmond S. L. Ho and Jacky C. P. Chan and Taku Komura
and Howard Leung",
title = "Interactive partner control in close interactions for
real-time applications",
journal = j-TOMCCAP,
volume = "9",
number = "3",
pages = "21:1--21:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2487268.2487274",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:50 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents a new framework for synthesizing
motion of a virtual character in response to the
actions performed by a user-controlled character in
real time. In particular, the proposed method can
handle scenes in which the characters are closely
interacting with each other such as those in partner
dancing and fighting. In such interactions,
coordinating the virtual characters with the human
player automatically is extremely difficult because the
system has to predict the intention of the player
character. In addition, the style variations from
different users affect the accuracy in recognizing the
movements of the player character when determining the
responses of the virtual character. To solve these
problems, our framework makes use of the spatial
relationship-based representation of the body parts
called interaction mesh, which has been proven
effective for motion adaptation. The method is
computationally efficient, enabling real-time character
control for interactive applications. We demonstrate
its effectiveness and versatility in synthesizing a
wide variety of motions with close interactions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "21",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Steinmetz:2013:ER,
author = "Ralf Steinmetz",
title = "Editorial: Reviewers",
journal = j-TOMCCAP,
volume = "9",
number = "4",
pages = "22:1--22:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501643.2501644",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "22",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Sakai:2013:PPC,
author = "Kazuya Sakai and Wei-Shinn Ku and Min-Te Sun and Roger
Zimmermann",
title = "Privacy preserving continuous multimedia streaming in
{MANETs}",
journal = j-TOMCCAP,
volume = "9",
number = "4",
pages = "23:1--23:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501643.2501645",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "At present, mobile devices are prevalent with end
users and continuous media streaming services in mobile
ad-hoc networks (MANETs) support popular applications.
It is required for applications that stream isochronous
media that the network link be continuously available.
In this study, we introduce two group-server scheduling
schemes to improve link continuity: static group-server
scheduling and dynamic group-server scheduling. With
our solution, if one of the current links between a
client and a server instance breaks, the client can
still download the multimedia content from another
scheduled server peer. In addition, we incorporate the
data link layer constraints as well as privacy concerns
into our protocol design. The simulation results show
that the proposed schemes significantly improve the
effective link duration, overall system performance,
and degree of privacy in MANETs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "23",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Dong:2013:RIA,
author = "Jian Dong and Bin Cheng and Xiangyu Chen and Tat-Seng
Chua and Shuicheng Yan and Xi Zhou",
title = "Robust image annotation via simultaneous feature and
sample outlier pursuit",
journal = j-TOMCCAP,
volume = "9",
number = "4",
pages = "24:1--24:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501643.2501646",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Graph-based semi-supervised image annotation has
achieved great success in a variety of studies, yet it
essentially and intuitively suffers from both the
irrelevant/noisy features (referred to as feature
outliers) and the unusual/corrupted samples (referred
to as sample outliers). In this work, we investigate
how to derive robust sample affinity matrix via
simultaneous feature and sample outlier pursuit. This
task is formulated as a Dual-outlier and Prior-driven
Low-Rank Representation (DP-LRR) problem, which
possesses convexity in objective function. In DP-LRR,
the clean data are assumed to be self-reconstructible
with low-rank coefficient matrix as in LRR; while the
error matrix is decomposed as the sum of a row-wise
sparse matrix and a column-wise sparse matrix, the
l$_{2, 1}$ -norm minimization of which encourages the
pursuit of feature and sample outliers respectively.
The DP-LRR is further regularized by the priors from
side information, that is, the inhomogeneous data
pairs. An efficient iterative procedure based on
linearized alternating direction method is presented to
solve the DP-LRR problem, with closed-form solutions
within each iteration. The derived low-rank
reconstruction coefficient matrix is then fed into any
graph based semi-supervised label propagation algorithm
for image annotation, and as a by-product, the cleaned
data from DP-LRR can also be utilized as a better image
representation to generally boost image annotation
performance. Extensive experiments on MIRFlickr,
Corel30K, NUS-WIDE-LITE and NUS-WIDE databases well
demonstrate the effectiveness of the proposed
formulation for robust image annotation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "24",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Villanueva:2013:HMB,
author = "Arantxa Villanueva and Victoria Ponz and Laura
Sesma-Sanchez and Mikel Ariz and Sonia Porta and Rafael
Cabeza",
title = "Hybrid method based on topography for robust detection
of iris center and eye corners",
journal = j-TOMCCAP,
volume = "9",
number = "4",
pages = "25:1--25:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501643.2501647",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "A multistage procedure to detect eye features is
presented. Multiresolution and topographic
classification are used to detect the iris center. The
eye corner is calculated combining valley detection and
eyelid curve extraction. The algorithm is tested in the
BioID database and in a proprietary database containing
more than 1200 images. The results show that the
suggested algorithm is robust and accurate. Regarding
the iris center our method obtains the best average
behavior for the BioID database compared to other
available algorithms. Additional contributions are that
our algorithm functions in real time and does not
require complex post processing stages.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "25",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wang:2013:ECR,
author = "Bo Wang and Jinqiao Wang and Hanqing Lu",
title = "Exploiting content relevance and social relevance for
personalized ad recommendation on {Internet TV}",
journal = j-TOMCCAP,
volume = "9",
number = "4",
pages = "26:1--26:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501643.2501648",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "There have been not many interactions between the two
dominant forms of mass communication: television and
the Internet, while nowadays the appearance of Internet
television makes them more closely. Different with
traditional TV in a passive mode of transmission,
Internet TV makes it more possible to make personalized
service recommendation because of the interactivity
between users and the Internet. In this article, we
introduce a scheme to provide targeted ad
recommendation to Internet TV users by exploiting the
content relevance and social relevance. First, we
annotate TV videos in terms of visual content analysis
and textual analysis by aligning visual and textual
information. Second, with user-user, video-video and
user-video relationships, we employ Multi-Relationship
based Probabilistic Matrix Factorization (MRPMF) to
learn representative tags for modeling user preference.
And then semantic content relevance (between product/ad
and TV video) and social relevance (between product/ad
and user interest) are calculated by projecting the
corresponding tags into our advertising concept space.
Finally, with relevancy scores we make ranking for
relevant product/ads to effectively provide users
personalized recommendation. The experimental results
demonstrate attractiveness and effectiveness of our
proposed approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "26",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Alam:2013:MHB,
author = "Kazi Masudul Alam and Abu Saleh Md Mahfujur Rahman and
Abdulmotaleb {El Saddik}",
title = "Mobile haptic e-book system to support {$3$D}
immersive reading in ubiquitous environments",
journal = j-TOMCCAP,
volume = "9",
number = "4",
pages = "27:1--27:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501643.2501649",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In order to leverage the use of various modalities
such as audio-visual materials in instilling effective
learning behavior we present an intuitive approach of
annotation based hapto-audio-visual interaction with
the traditional digital learning materials such as
e-books. By integrating the home entertainment system
in the user's reading experience combined with haptic
interfaces we want to examine whether such augmentation
of modalities influence the user's learning patterns.
The proposed Haptic E--Book (HE-Book) system leverages
the haptic jacket, haptic arm band as well as haptic
sofa interfaces to receive haptic emotive signals
wirelessly in the form of patterned vibrations of the
actuators and expresses the learning material by
incorporating image, video, 3D environment based
augmented display in order to pave ways for intimate
reading experience in the popular mobile e-book
platform.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "27",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Nguyen:2013:TDA,
author = "Tam V. Nguyen and Si Liu and Bingbing Ni and Jun Tan
and Yong Rui and Shuicheng Yan",
title = "Towards decrypting attractiveness via multi-modality
cues",
journal = j-TOMCCAP,
volume = "9",
number = "4",
pages = "28:1--28:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501643.2501650",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Decrypting the secret of beauty or attractiveness has
been the pursuit of artists and philosophers for
centuries. To date, the computational model for
attractiveness estimation has been actively explored in
computer vision and multimedia community, yet with the
focus mainly on facial features. In this article, we
conduct a comprehensive study on female attractiveness
conveyed by single/multiple modalities of cues, that
is, face, dressing and/or voice, and aim to discover
how different modalities individually and collectively
affect the human sense of beauty. To extensively
investigate the problem, we collect the Multi-Modality
Beauty (M$^2$ B) dataset, which is annotated with
attractiveness levels converted from manual $k$-wise
ratings and semantic attributes of different
modalities. Inspired by the common consensus that
middle-level attribute prediction can assist
higher-level computer vision tasks, we manually labeled
many attributes for each modality. Next, a tri-layer
Dual-supervised Feature-Attribute-Task (DFAT) network
is proposed to jointly learn the attribute model and
attractiveness model of single/multiple modalities. To
remedy possible loss of information caused by
incomplete manual attributes, we also propose a novel
Latent Dual-supervised Feature-Attribute-Task (LDFAT)
network, where latent attributes are combined with
manual attributes to contribute to the final
attractiveness estimation. The extensive experimental
evaluations on the collected M$^2$ B dataset well
demonstrate the effectiveness of the proposed DFAT and
LDFAT networks for female attractiveness prediction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "28",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Tang:2013:TOH,
author = "Jinhui Tang and Qiang Chen and Meng Wang and Shuicheng
Yan and Tat-Seng Chua and Ramesh Jain",
title = "Towards optimizing human labeling for interactive
image tagging",
journal = j-TOMCCAP,
volume = "9",
number = "4",
pages = "29:1--29:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501643.2501651",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Interactive tagging is an approach that combines human
and computer to assign descriptive keywords to image
contents in a semi-automatic way. It can avoid the
problems in automatic tagging and pure manual tagging
by achieving a compromise between tagging performance
and manual cost. However, conventional research efforts
on interactive tagging mainly focus on sample selection
and models for tag prediction. In this work, we
investigate interactive tagging from a different
aspect. We introduce an interactive image tagging
framework that can more fully make use of human's
labeling efforts. That means, it can achieve a
specified tagging performance by taking less manual
labeling effort or achieve better tagging performance
with a specified labeling cost. In the framework,
hashing is used to enable a quick clustering of image
regions and a dynamic multiscale clustering labeling
strategy is proposed such that users can label a large
group of similar regions each time. We also employ a
tag refinement method such that several inappropriate
tags can be automatically corrected. Experiments on a
large dataset demonstrate the effectiveness of our
approach",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "29",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Carbunar:2013:FNA,
author = "Bogdan Carbunar and Rahul Potharaju and Michael Pearce
and Venugopal Vasudevan and Michael Needham",
title = "A framework for network aware caching for video on
demand systems",
journal = j-TOMCCAP,
volume = "9",
number = "4",
pages = "30:1--30:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501643.2501652",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
note = "See errata \cite{Carbunar:2014:EFN}.",
abstract = "Video on Demand (VoD) services allow users to select
and locally consume remotely stored content. We
investigate the use of caching to solve the scalability
issues of several existing VoD providers. We propose
metrics and goals that define the requirements of a
caching framework for CDNs of VoD systems. Using data
logs collected from Motorola equipment from Comcast VoD
deployments we show that several classic caching
solutions do not satisfy the proposed goals. We address
this issue by developing novel techniques for
predicting future values of several metrics of
interest. We rely on computed predictions to define the
penalty imposed on the system, both network and caching
sites, when not storing individual items. We use item
penalties to devise novel caching and static content
placement strategies. We use the previously mentioned
data logs to validate our solutions and show that they
satisfy all the defined goals.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "30",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Li:2013:ENO,
author = "Zechao Li and Jing Liu and Meng Wang and Changsheng Xu
and Hanqing Lu",
title = "Enhancing news organization for convenient retrieval
and browsing",
journal = j-TOMCCAP,
volume = "10",
number = "1",
pages = "1:1--1:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2488732",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "To facilitate users to access news quickly and
comprehensively, we design a news search and browsing
system named GeoVisNews, in which the news elements of
``Where'', ``Who'', ``What'' and ``When'' are enhanced
via news geo-localization, image enrichment and joint
ranking, respectively. For news geo-localization, an
Ordinal Correlation Consistent Matrix Factorization
(OCCMF) model is proposed to maintain the relevance
rankings of locations to a specific news document and
simultaneously capture intra-relations among locations
and documents. To visualize news, we develop a novel
method to enrich news documents with appropriate web
images. Specifically, multiple queries are first
generated from news documents for image search, and
then the appropriate images are selected from the
collected web images by an intelligent fusion approach
based on multiple features. Obtaining the geo-localized
and image enriched news resources, we further employ a
joint ranking strategy to provide relevant, timely and
popular news items as the answer of user searching
queries. Extensive experiments on a large-scale news
dataset collected from the web demonstrate the superior
performance of the proposed approaches over related
methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "1",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Knees:2013:SMS,
author = "Peter Knees and Markus Schedl",
title = "A survey of music similarity and recommendation from
music context data",
journal = j-TOMCCAP,
volume = "10",
number = "1",
pages = "2:1--2:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2542205.2542206",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this survey article, we give an overview of methods
for music similarity estimation and music
recommendation based on music context data. Unlike
approaches that rely on music content and have been
researched for almost two decades, music-context -based
(or contextual ) approaches to music retrieval are a
quite recent field of research within music information
retrieval (MIR). Contextual data refers to all
music-relevant information that is not included in the
audio signal itself. In this article, we focus on
contextual aspects of music primarily accessible
through web technology. We discuss different sources of
context-based data for individual music pieces and for
music artists. We summarize various approaches for
constructing similarity measures based on the
collaborative or cultural knowledge incorporated into
these data sources. In particular, we identify and
review three main types of context-based similarity
approaches: text-retrieval-based approaches (relying on
web-texts, tags, or lyrics), co-occurrence-based
approaches (relying on playlists, page counts,
microblogs, or peer-to-peer-networks), and approaches
based on user ratings or listening habits. This article
elaborates the characteristics of the presented
context-based measures and discusses their strengths as
well as their weaknesses.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "2",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhao:2013:DPO,
author = "Yi-Liang Zhao and Qiang Chen and Shuicheng Yan and
Tat-Seng Chua and Daqing Zhang",
title = "Detecting profilable and overlapping communities with
user-generated multimedia contents in {LBSNs}",
journal = j-TOMCCAP,
volume = "10",
number = "1",
pages = "3:1--3:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2502415",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In location-based social networks (LBSNs), users
implicitly interact with each other by visiting places,
issuing comments and/or uploading photos. These
heterogeneous interactions convey the latent
information for identifying meaningful user groups,
namely social communities, which exhibit unique
location-oriented characteristics. In this work, we aim
to detect and profile social communities in LBSNs by
representing the heterogeneous interactions with a
multimodality nonuniform hypergraph. Here, the vertices
of the hypergraph are users, venues, textual comments
or photos and the hyperedges characterize the k
-partite heterogeneous interactions such as posting
certain comments or uploading certain photos while
visiting certain places. We then view each detected
social community as a dense subgraph within the
heterogeneous hypergraph, where the user community is
constructed by the vertices and edges in the dense
subgraph and the profile of the community is
characterized by the vertices related with venues,
comments and photos and their inter-relations. We
present an efficient algorithm to detect the overlapped
dense subgraphs, where the profile of each social
community is guaranteed to be available by constraining
the minimal number of vertices in each modality.
Extensive experiments on Foursquare data well validated
the effectiveness of the proposed framework in terms of
detecting meaningful social communities and uncovering
their underlying profiles in LBSNs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "3",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Bhatnagar:2013:SRI,
author = "Gaurav Bhatnagar and Q. M. Jonathan Wu and Pradeep K.
Atrey",
title = "Secure randomized image watermarking based on singular
value decomposition",
journal = j-TOMCCAP,
volume = "10",
number = "1",
pages = "4:1--4:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2542205.2542207",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article, a novel logo watermarking scheme is
proposed based on wavelet frame transform, singular
value decomposition and automatic thresholding. The
proposed scheme essentially rectifies the ambiguity
problem in the SVD-based watermarking. The core idea is
to randomly upscale the size of host image using
reversible random extension transform followed by the
embedding of logo watermark in the wavelet frame
domain. After embedding, a verification phase is casted
with the help of a binary watermark and toral
automorphism. At the extraction end, the binary
watermark is first extracted followed by the
verification of watermarked image. The logo watermark
is extracted if and only if the watermarked image is
verified. The security, attack and comparative analysis
confirm high security, efficiency and robustness of the
proposed watermarking system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "4",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Mou:2013:CBC,
author = "Luntian Mou and Tiejun Huang and Yonghong Tian and
Menglin Jiang and Wen Gao",
title = "Content-based copy detection through multimodal
feature representation and temporal pyramid matching",
journal = j-TOMCCAP,
volume = "10",
number = "1",
pages = "5:1--5:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2542205.2542208",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Content-based copy detection (CBCD) is drawing
increasing attention as an alternative technology to
watermarking for video identification and copyright
protection. In this article, we present a comprehensive
method to detect copies that are subjected to
complicated transformations. A multimodal feature
representation scheme is designed to exploit the
complementarity of audio features, global and local
visual features so that optimal overall robustness to a
wide range of complicated modifications can be
achieved. Meanwhile, a temporal pyramid matching
algorithm is proposed to assemble frame-level
similarity search results into sequence-level matching
results through similarity evaluation over multiple
temporal granularities. Additionally, inverted indexing
and locality sensitive hashing (LSH) are also adopted
to speed up similarity search. Experimental results
over benchmarking datasets of TRECVID 2010 and 2009
demonstrate that the proposed method outperforms other
methods for most transformations in terms of copy
detection accuracy. The evaluation results also suggest
that our method can achieve competitive copy
localization preciseness.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "5",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chen:2013:LSM,
author = "Xiangyu Chen and Yadong Mu and Hairong Liu and
Shuicheng Yan and Yong Rui and Tat-Seng Chua",
title = "Large-scale multilabel propagation based on efficient
sparse graph construction",
journal = j-TOMCCAP,
volume = "10",
number = "1",
pages = "6:1--6:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2542205.2542209",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "With the popularity of photo-sharing websites, the
number of web images has exploded into unseen
magnitude. Annotating such large-scale data will cost
huge amount of human resources and is thus
unaffordable. Motivated by this challenging problem, we
propose a novel sparse graph based multilabel
propagation (SGMP) scheme for super large scale
datasets. Both the efficacy and accuracy of the image
annotation are further investigated under different
graph construction strategies, where Gaussian noise and
non-Gaussian sparse noise are simultaneously considered
in the formulations of these strategies. Our proposed
approach outperforms the state-of-the-art algorithms by
focusing on: (1) For large-scale graph construction, a
simple yet efficient LSH (Locality Sensitive
Hashing)-based sparse graph construction scheme is
proposed to speed up the construction. We perform the
multilabel propagation on this hashing-based graph
construction, which is derived with LSH approach
followed by sparse graph construction within the
individual hashing buckets; (2) To further improve the
accuracy, we propose a novel sparsity induced scalable
graph construction scheme, which is based on a general
sparse optimization framework. Sparsity essentially
implies a very strong prior: for large scale
optimization, the values of most variables shall be
zeros when the solution reaches the optimum. By
utilizing this prior, the solutions of large-scale
sparse optimization problems can be derived by solving
a series of much smaller scale subproblems; (3) For
multilabel propagation, different from the traditional
algorithms that propagate over individual label
independently, our proposed propagation first encodes
the label information of an image as a unit label
confidence vector and naturally imposes inter-label
constraints and manipulates labels interactively. Then,
the entire propagation problem is formulated on the
concept of Kullback--Leibler divergence defined on
probabilistic distributions, which guides the
propagation of the supervision information. Extensive
experiments on the benchmark dataset NUS-WIDE with 270k
images and its lite version NUS-WIDE-LITE with 56k
images well demonstrate the effectiveness and
scalability of the proposed multi-label propagation
scheme.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "6",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Houle:2013:API,
author = "Michael E. Houle and Vincent Oria and Shin'ichi Satoh
and Jichao Sun",
title = "Annotation propagation in image databases using
similarity graphs",
journal = j-TOMCCAP,
volume = "10",
number = "1",
pages = "7:1--7:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2487736",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The practicality of large-scale image indexing and
querying methods depends crucially upon the
availability of semantic information. The manual
tagging of images with semantic information is in
general very labor intensive, and existing methods for
automated image annotation may not always yield
accurate results. The aim of this paper is to reduce to
a minimum the amount of human intervention required in
the semantic annotation of images, while preserving a
high degree of accuracy. Ideally, only one copy of each
object of interest would be labeled manually, and the
labels would then be propagated automatically to all
other occurrences of the objects in the database. To
this end, we propose an influence propagation strategy,
SW-KProp, that requires no human intervention beyond
the initial labeling of a subset of the images.
SW-KProp distributes semantic information within a
similarity graph defined on all images in the database:
each image iteratively transmits its current label
information to its neighbors, and then readjusts its
own label according to the combined influences of its
neighbors. SW-KProp influence propagation can be
efficiently performed by means of matrix computations,
provided that pairwise similarities of images are
available. We also propose a variant of SW-KProp which
enhances the quality of the similarity graph by
selecting a reduced feature set for each prelabeled
image and rebuilding its neighborhood. The performances
of the SW-KProp method and its variant were evaluated
against several competing methods on classification
tasks for three image datasets: a handwritten digit
dataset, a face dataset and a web image dataset. For
the digit images, SW-KProp and its variant performed
consistently better than the other methods tested. For
the face and web images, SW-KProp outperformed its
competitors for the case when the number of prelabeled
images was relatively small. The performance was seen
to improve significantly when the feature selection
strategy was applied.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "7",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Mallik:2013:MOR,
author = "Anupama Mallik and Hiranmay Ghosh and Santanu
Chaudhury and Gaurav Harit",
title = "{MOWL}: an ontology representation language for
{Web}-based multimedia applications",
journal = j-TOMCCAP,
volume = "10",
number = "1",
pages = "8:1--8:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2542205.2542210",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Several multimedia applications need to reason with
concepts and their media properties in specific domain
contexts. Media properties of concepts exhibit some
unique characteristics that cannot be dealt with
conceptual modeling schemes followed in the existing
ontology representation and reasoning schemes. We have
proposed a new perceptual modeling technique for
reasoning with media properties observed in multimedia
instances and the latent concepts. Our knowledge
representation scheme uses a causal model of the world
where concepts manifest in media properties with
uncertainties. We introduce a probabilistic reasoning
scheme for belief propagation across domain concepts
through observation of media properties. In order to
support the perceptual modeling and reasoning paradigm,
we propose a new ontology language, Multimedia Web
Ontology Language (MOWL). Our primary contribution in
this article is to establish the need for the new
ontology language and to introduce the semantics of its
novel language constructs. We establish the generality
of our approach with two disparate knowledge-intensive
applications involving reasoning with media properties
of concepts.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "8",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Deng:2014:DLB,
author = "Yunhua Deng and Rynson W. H. Lau",
title = "Dynamic load balancing in distributed virtual
environments using heat diffusion",
journal = j-TOMCCAP,
volume = "10",
number = "2",
pages = "16:1--16:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2499906",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:57 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Distributed virtual environments (DVEs) are attracting
a lot of attention in recent years, due to the
increasing popularity of online gaming and social
networks. As the number of concurrent users of a DVE
increases, a critical problem is on how the workload
among multiple servers can be balanced in order to
maintain real-time performance. Although a number of
load balancing methods have been proposed, they either
try to produce high quality load balancing results and
become too slow or emphasize on efficiency and the load
balancing results become less effective. In this
article, we propose a new approach to address this
problem based on heat diffusion. Our work has two main
contributions. First, we propose a local and a global
load balancing methods for DVEs based on heat
diffusion. Second, we investigate two performance
factors of the proposed methods, the convergence
threshold and the load balancing interval. We have
conducted a number of experiments to extensively
evaluate the performance of the proposed methods. Our
experimental results show that the proposed methods
outperform existing methods in that our methods are
effective in reducing server overloading while at the
same time being efficient.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "16",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{She:2014:CID,
author = "James She and Jon Crowcroft and Hao Fu and Flora Li",
title = "Convergence of interactive displays with smart mobile
devices for effective advertising: a survey",
journal = j-TOMCCAP,
volume = "10",
number = "2",
pages = "17:1--17:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2557450",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:57 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The trend of replacing public static signages with
digital displays creates opportunities for interactive
display systems, which can be used in collaborative
workspaces, social gaming platforms and advertising.
Based on marketing communication concepts and existing
models for consumer behavior, three stages, namely
attraction, interaction and conation, are defined in
this article to analyze the effectiveness of
interactive display advertising. By reviewing various
methods and strategies employed by existing systems
with attraction, interaction and conation stages, this
article concludes that smart mobile devices should be
integrated as a component to increase the effectiveness
of interactive displays as advertising tools. Future
research challenges related to this topic are also
discussed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "17",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Gonina:2014:SMC,
author = "Ekaterina Gonina and Gerald Friedland and Eric
Battenberg and Penporn Koanantakool and Michael
Driscoll and Evangelos Georganas and Kurt Keutzer",
title = "Scalable multimedia content analysis on parallel
platforms using {Python}",
journal = j-TOMCCAP,
volume = "10",
number = "2",
pages = "18:1--18:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2517151",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:57 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this new era dominated by consumer-produced media
there is a high demand for web-scalable solutions to
multimedia content analysis. A compelling approach to
making applications scalable is to explicitly map their
computation onto parallel platforms. However,
developing efficient parallel implementations and fully
utilizing the available resources remains a challenge
due to the increased code complexity, limited
portability and required low-level knowledge of the
underlying hardware. In this article, we present
PyCASP, a Python-based framework that automatically
maps computation onto parallel platforms from Python
application code to a variety of parallel platforms.
PyCASP is designed using a systematic, pattern-oriented
approach to offer a single software development
environment for multimedia content analysis
applications. Using PyCASP, applications can be
prototyped in a couple hundred lines of Python code and
automatically scale to modern parallel processors.
Applications written with PyCASP are portable to a
variety of parallel platforms and efficiently scale
from a single desktop Graphics Processing Unit (GPU) to
an entire cluster with a small change to application
code. To illustrate our approach, we present three
multimedia content analysis applications that use our
framework: a state-of-the-art speaker diarization
application, a content-based music recommendation
system based on the Million Song Dataset, and a video
event detection system for consumer-produced videos. We
show that across this wide range of applications, our
approach achieves the goal of automatic portability and
scalability while at the same time allowing easy
prototyping in a high-level language and efficient
performance of low-level optimized code.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "18",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chandra:2014:HPM,
author = "Surendar Chandra and John Boreczky and Lawrence A.
Rowe",
title = "High performance many-to-many intranet screen sharing
with {DisplayCast}",
journal = j-TOMCCAP,
volume = "10",
number = "2",
pages = "19:1--19:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2534328",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:57 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "DisplayCast is a many to many Intranet screen sharing
system. Its screen capture mechanism creates a sequence
of pixmap images of the screen updates. Prior systems
that used a similar approach were designed to operate
over constrained wide-area networks and did not exploit
the Intranet network conditions to achieve high capture
rates. First we empirically analyzed the screen
contents for a variety of scenarios. We showed that
screen updates were sporadic with long periods of
inactivity. When active, screens were updated at far
higher rates than was supported by earlier systems. The
mismatch was pronounced for interactive scenarios. Even
during active screen updates, the number of updated
pixels were frequently small. We showed that crucial
information can be lost if individual updates were
merged. When the available system resources could not
support high capture rates, we showed ways in which
updates can be effectively collapsed. Next, we
investigate compression mechanisms for streaming these
updates. Even while using a hardware encoder, lossy
compressors such as H.264 were unable to sustain high
frame rates. Though Zlib lossless compression operated
within the latency and compression rate requirements,
the compression efficiency was poor. By analyzing the
screen pixels, we developed a practical transformation
that significantly improved compression rates.
DisplayCast incorporates these observations. It shares
the processor and network resources required for screen
capture, compression and transmission with host
applications whose output needs to be shared.
DisplayCast is agile and uses faster processing
capability to achieve even higher performance. Our
system components operate natively in Windows 7, Mac OS
X and iOS and is deployed in a production setting.
DisplayCast is released under a New BSD License.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "19",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lee:2014:NDH,
author = "Ya-Lin Lee and Wen-Hsiang Tsai",
title = "A new data hiding method via revision history records
on collaborative writing platforms",
journal = j-TOMCCAP,
volume = "10",
number = "2",
pages = "20:1--20:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2534408",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:57 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "A new data hiding method via collaboratively-written
articles with forged revision history records on
collaborative writing platforms is proposed. The hidden
message is camouflaged as a stego-document consisting
of a stego-article and a revision history created
through a simulated process of collaborative writing.
The revisions are forged using a database constructed
by mining word sequences used in real cases from an
English Wikipedia XML dump. Four characteristics of
article revisions are identified and utilized to embed
secret messages, including the author of each revision,
the number of corrected word sequences, the content of
the corrected word sequences, and the word sequences
replacing the corrected ones. Related problems arising
in utilizing these characteristics for data hiding are
identified and solved skillfully, resulting in an
effective multiway method for hiding secret messages
into the revision history. To create more realistic
revisions, Huffman coding based on the word sequence
frequencies collected from Wikipedia is applied to
encode the word sequences. Good experimental results
show the feasibility of the proposed method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "20",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yuan:2014:MRB,
author = "Jin Yuan and Yi-Liang Zhao and Huanbo Luan and Meng
Wang and Tat-Seng Chua",
title = "Memory recall based video search: Finding videos you
have seen before based on your memory",
journal = j-TOMCCAP,
volume = "10",
number = "2",
pages = "21:1--21:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2534409",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:57 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We often remember images and videos that we have seen
or recorded before but cannot quite recall the exact
venues or details of the contents. We typically have
vague memories of the contents, which can often be
expressed as a textual description and/or rough visual
descriptions of the scenes. Using these vague memories,
we then want to search for the corresponding videos of
interest. We call this ``Memory Recall based Video
Search'' (MRVS). To tackle this problem, we propose a
video search system that permits a user to input
his/her vague and incomplete query as a combination of
text query, a sequence of visual queries, and/or
concept queries. Here, a visual query is often in the
form of a visual sketch depicting the outline of scenes
within the desired video, while each corresponding
concept query depicts a list of visual concepts that
appears in that scene. As the query specified by users
is generally approximate or incomplete, we need to
develop techniques to handle this inexact and
incomplete specification by also leveraging on user
feedback to refine the specification. We utilize
several innovative approaches to enhance the automatic
search. First, we employ a visual query suggestion
model to automatically suggest potential visual
features to users as better queries. Second, we utilize
a color similarity matrix to help compensate for
inexact color specification in visual queries. Third,
we leverage on the ordering of visual queries and/or
concept queries to rerank the results by using a greedy
algorithm. Moreover, as the query is inexact and there
is likely to be only one or few possible answers, we
incorporate an interactive feedback loop to permit the
users to label related samples which are visually
similar or semantically close to the relevant sample.
Based on the labeled samples, we then propose
optimization algorithms to update visual queries and
concept weights to refine the search results. We
conduct experiments on two large-scale video datasets:
TRECVID 2010 and YouTube. The experimental results
demonstrate that our proposed system is effective for
MRVS tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "21",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2014:MIK,
author = "Xianglong Liu and Yadong Mu and Bo Lang and Shih-Fu
Chang",
title = "Mixed image-keyword query adaptive hashing over
multilabel images",
journal = j-TOMCCAP,
volume = "10",
number = "2",
pages = "22:1--22:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2540990",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:57 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article defines a new hashing task motivated by
real-world applications in content-based image
retrieval, that is, effective data indexing and
retrieval given mixed query (query image together with
user-provided keywords). Our work is distinguished from
state-of-the-art hashing research by two unique
features: (1) Unlike conventional image retrieval
systems, the input query is a combination of an
exemplar image and several descriptive keywords, and
(2) the input image data are often associated with
multiple labels. It is an assumption that is more
consistent with the realistic scenarios. The mixed
image-keyword query significantly extends traditional
image-based query and better explicates the user
intention. Meanwhile it complicates semantics-based
indexing on the multilabel data. Though several
existing hashing methods can be adapted to solve the
indexing task, unfortunately they all prove to suffer
from low effectiveness. To enhance the hashing
efficiency, we propose a novel scheme ``boosted shared
hashing''. Unlike prior works that learn the hashing
functions on either all image labels or a single label,
we observe that the hashing function can be more
effective if it is designed to index over an optimal
label subset. In other words, the association between
labels and hash bits are moderately sparse. The
sparsity of the bit-label association indicates greatly
reduced computation and storage complexities for
indexing a new sample, since only limited number of
hashing functions will become active for the specific
sample. We develop a Boosting style algorithm for
simultaneously optimizing both the optimal label
subsets and hashing functions in a unified formulation,
and further propose a query-adaptive retrieval
mechanism based on hash bit selection for mixed
queries, no matter whether or not the query words exist
in the training data. Moreover, we show that the
proposed method can be easily extended to the case
where the data similarity is gauged by nonlinear kernel
functions. Extensive experiments are conducted on
standard image benchmarks like CIFAR-10, NUS-WIDE and
a-TRECVID. The results validate both the sparsity of
the bit-label association and the convergence of the
proposed algorithm, and demonstrate that the proposed
hashing scheme achieves substantially superior
performances over state-of-the-art methods under the
same hash bit budget.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "22",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Anonymous:2014:TCO,
author = "Anonymous",
title = "Table of Contents: Online Supplement Volume 10, Number
1s",
journal = j-TOMCCAP,
volume = "10",
number = "3",
pages = "22:1--22:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2602969",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Apr 15 12:20:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "22",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2014:DUB,
author = "Ning Liu and Huajie Cui and S.-H. Gary Chan and
Zhipeng Chen and Yirong Zhuang",
title = "Dissecting User Behaviors for a Simultaneous Live and
{VoD IPTV} System",
journal = j-TOMCCAP,
volume = "10",
number = "3",
pages = "23:1--23:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2568194",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Apr 15 12:20:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "IPTV services deployed nowadays often consist of both
live TV and Video-on-Demand (VoD), offered by the same
service provider to the same pool of users over the
same managed network. Understanding user behaviors in
such a setting is hence an important step for system
modelling and optimization. Previous studies on user
behavior on video services were on either live TV or
VoD. For the first time, we conduct an in-depth
large-scale behavior study for IPTV users offering
simultaneously live TV and VoD choices at the same
time. Our data is from the largest IPTV service
provider in China, offering hundreds of live channels
and hundreds of thousands of VoD files, with traces
covering more than 1.9 million users over a period of 5
months. This large dataset provides us a unique
opportunity to cross-compare user viewing behaviors for
these services on the same platform, and sheds valuable
insights on how users interact with such a simultaneous
system. Our results lead to new understanding on IPTV
user behaviors which have strong implications on system
design. For example, we find that the average holding
time for VoD is significantly longer than live TV. live
TV users tend to surf more. However, if such channel
surfing is discounted, the holding times of both
services are not much different. While users in VoD
tend to view HD longer, channel popularity for live TV
is much less dependent on its video quality. In
contrast to some popular assumptions on user
interactivity, the transitions among live TV, VoD, and
offline modes are far from a Markov model.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "23",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Gaeta:2014:DDI,
author = "Rossano Gaeta and Marco Grangetto and Lorenzo Bovio",
title = "{DIP}: {Distributed Identification of Polluters} in
{P2P} Live Streaming",
journal = j-TOMCCAP,
volume = "10",
number = "3",
pages = "24:1--24:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2568223",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Apr 15 12:20:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Peer-to-peer live streaming applications are
vulnerable to malicious actions of peers that
deliberately modify data to decrease or prevent the
fruition of the media (pollution attack). In this
article we propose DIP, a fully distributed, accurate,
and robust algorithm for the identification of
polluters. DIP relies on checks that are computed by
peers upon completing reception of all blocks composing
a data chunk. A check is a special message that
contains the set of peer identifiers that provided
blocks of the chunk as well as a bit to signal if the
chunk has been corrupted. Checks are periodically
transmitted by peers to their neighbors in the overlay
network; peers receiving checks use them to maintain a
factor graph. This graph is bipartite and an
incremental belief propagation algorithm is run on it
to compute the probability of a peer being a polluter.
Using a prototype deployed over PlanetLab we show by
extensive experimentation that DIP allows honest peers
to identify polluters with very high accuracy and
completeness, even when polluters collude to deceive
them. Furthermore, we show that DIP is efficient,
requiring low computational, communication, and storage
overhead at each peer.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "24",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hoque:2014:SEM,
author = "Mohammad Asharful Hoque and Matti Siekkinen and Jukka
K. Nurminen and Sasu Tarkoma and Mika Aalto",
title = "Saving Energy in Mobile Devices for On-Demand
Multimedia Streaming --- A Cross-Layer Approach",
journal = j-TOMCCAP,
volume = "10",
number = "3",
pages = "25:1--25:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2556942",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Apr 15 12:20:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article proposes a novel energy-efficient
multimedia delivery system called EStreamer. First, we
study the relationship between buffer size at the
client, burst-shaped TCP-based multimedia traffic, and
energy consumption of wireless network interfaces in
smartphones. Based on the study, we design and
implement EStreamer for constant bit rate and
rate-adaptive streaming. EStreamer can improve battery
lifetime by 3x, 1.5x, and 2x while streaming over
Wi-Fi, 3G, and 4G, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "25",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wang:2014:HEK,
author = "Feng Wang and Wan-Lei Zhao and Chong-Wah Ngo and
Bernard Merialdo",
title = "A {Hamming} Embedding Kernel with Informative
Bag-of-Visual Words for Video Semantic Indexing",
journal = j-TOMCCAP,
volume = "10",
number = "3",
pages = "26:1--26:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2535938",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Apr 15 12:20:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article, we propose a novel Hamming embedding
kernel with informative bag-of-visual words to address
two main problems existing in traditional BoW
approaches for video semantic indexing. First, Hamming
embedding is employed to alleviate the information loss
caused by SIFT quantization. The Hamming distances
between keypoints in the same cell are calculated and
integrated into the SVM kernel to better discriminate
different image samples. Second, to highlight the
concept-specific visual information, we propose to
weight the visual words according to their
informativeness for detecting specific concepts. We
show that our proposed kernels can significantly
improve the performance of concept detection.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "26",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yang:2014:MDF,
author = "Ying Yang and Ioannis Ivrissimtzis",
title = "Mesh Discriminative Features for {$3$D} Steganalysis",
journal = j-TOMCCAP,
volume = "10",
number = "3",
pages = "27:1--27:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2535555",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Apr 15 12:20:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We propose a steganalytic algorithm for triangle
meshes, based on the supervised training of a
classifier by discriminative feature vectors. After a
normalization step, the triangle mesh is calibrated by
one step of Laplacian smoothing and then a feature
vector is computed, encoding geometric information
corresponding to vertices, edges and faces. For a given
steganographic or watermarking algorithm, we create a
training set containing unmarked meshes and meshes
marked by that algorithm, and train a classifier using
Quadratic Discriminant Analysis. The performance of the
proposed method was evaluated on six well-known
watermarking/steganographic schemes with satisfactory
accuracy rates.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "27",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hamam:2014:QEM,
author = "Abdelwahab Hamam and Abdulmotaleb {El Saddik} and
Jihad Alja'am",
title = "A Quality of Experience Model for Haptic Virtual
Environments",
journal = j-TOMCCAP,
volume = "10",
number = "3",
pages = "28:1--28:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2540991",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Apr 15 12:20:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Haptic-based Virtual Reality (VR) applications have
many merits. What is still obscure, from the designer's
perspective of these applications, is the experience
the users will undergo when they use the VR system.
Quality of Experience (QoE) is an evaluation metric
from the user's perspective that unfortunately has
received limited attention from the research community.
Assessing the QoE of VR applications reflects the
amount of overall satisfaction and benefits gained from
the application in addition to laying the foundation
for ideal user-centric design in the future. In this
article, we propose a taxonomy for the evaluation of
QoE for multimedia applications and in particular VR
applications. We model this taxonomy using a Fuzzy
logic Inference System (FIS) to quantitatively measure
the QoE of haptic virtual environments. We build and
test our FIS by conducting a users' study analysis to
evaluate the QoE of a haptic game application. Our
results demonstrate that the proposed FIS model
reflects the user's estimation of the application's
quality significantly with low error and hence is
suited for QoE evaluation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "28",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Botta:2014:PCI,
author = "Marco Botta and Davide Cavagnino and Victor Pomponiu",
title = "Protecting the Content Integrity of Digital Imagery
with Fidelity Preservation: An Improved Version",
journal = j-TOMCCAP,
volume = "10",
number = "3",
pages = "29:1--29:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2568224",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Apr 15 12:20:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Fragile watermarking has attracted a lot of attention
in the last decade. An interesting approach, presented
in 2011 by Lin et al., results in very high quality of
the watermarked images. However, after a thorough
examination of the paper, a few improvements are
proposed in our revised version of the algorithm in
order to overcome some shortcomings. In particular,
changes to the pseudocode and modifications to deal
with pixel saturation are suggested, along with a way
to improve the scheme security. Finally, a deeper
analysis of the security is presented.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "29",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Luo:2014:ICH,
author = "Da Luo and Weiqi Luo and Rui Yang and Jiwu Huang",
title = "Identifying Compression History of Wave Audio and Its
Applications",
journal = j-TOMCCAP,
volume = "10",
number = "3",
pages = "30:1--30:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2575978",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Tue Apr 15 12:20:53 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Audio signal is sometimes stored and/or processed in
WAV (waveform) format without any knowledge of its
previous compression operations. To perform some
subsequent processing, such as digital audio forensics,
audio enhancement and blind audio quality assessment,
it is necessary to identify its compression history. In
this article, we will investigate how to identify a
decompressed wave audio that went through one of three
popular compression schemes, including MP3, WMA
(windows media audio) and AAC (advanced audio coding).
By analyzing the corresponding frequency coefficients,
including modified discrete cosine transform (MDCT) and
Mel-frequency cepstral coefficients (MFCCs), of those
original audio clips and their decompressed versions
with different compression schemes and bit rates, we
propose several statistics to identify the compression
scheme as well as the corresponding bit rate previously
used for a given WAV signal. The experimental results
evaluated on 8,800 audio clips with various contents
have shown the effectiveness of the proposed method. In
addition, some potential applications of the proposed
method are discussed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "30",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhang:2014:CDM,
author = "Tianzhu Zhang and Changsheng Xu",
title = "Cross-Domain Multi-Event Tracking via {CO-PMHT}",
journal = j-TOMM,
volume = "10",
number = "4",
pages = "31:1--31:??",
month = jun,
year = "2014",
DOI = "https://doi.org/10.1145/2602633",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 8 11:32:58 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "With the massive growth of events on the Internet,
efficient organization and monitoring of events becomes
a practical challenge. To deal with this problem, we
propose a novel CO-PMHT (CO-Probabilistic
Multi-Hypothesis Tracking) algorithm for cross-domain
multi-event tracking to obtain their informative
summary details and evolutionary trends over time. We
collect a large-scale dataset by searching keywords on
two domains (Gooogle News and Flickr) and downloading
both images and textual content for an event. Given the
input data, our algorithm can track multiple events in
the two domains collaboratively and boost the tracking
performance. Specifically, the bridge between two
domains is a semantic posterior probability, that
avoids the domain gap. After tracking, we can visualize
the whole evolutionary process of the event over time
and mine the semantic topics of each event for deep
understanding and event prediction. The extensive
experimental evaluations on the collected dataset well
demonstrate the effectiveness of the proposed algorithm
for cross-domain multi-event tracking.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "31",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Huang:2014:PVR,
author = "Qinghua Huang and Bisheng Chen and Jingdong Wang and
Tao Mei",
title = "Personalized Video Recommendation through Graph
Propagation",
journal = j-TOMM,
volume = "10",
number = "4",
pages = "32:1--32:??",
month = jun,
year = "2014",
DOI = "https://doi.org/10.1145/2598779",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 8 11:32:58 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The rapid growth of the number of videos on the
Internet provides enormous potential for users to find
content of interest. However, the vast quantity of
videos also turns the finding process into a difficult
task. In this article, we address the problem of
providing personalized video recommendation for users.
Rather than only exploring the user-video bipartite
graph that is formulated using click information, we
first combine the clicks and queries information to
build a tripartite graph. In the tripartite graph, the
query nodes act as bridges to connect user nodes and
video nodes. Then, to further enrich the connections
between users and videos, three subgraphs between the
same kinds of nodes are added to the tripartite graph
by exploring content-based information (video tags and
textual queries). We propose an iterative propagation
algorithm over the enhanced graph to compute the
preference information of each user. Experiments
conducted on a dataset with 1,369 users, 8,765 queries,
and 17,712 videos collected from a commercial video
search engine demonstrate the effectiveness of the
proposed method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "32",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Li:2014:UVS,
author = "Haitao Li and Xu Cheng and Jiangchuan Liu",
title = "Understanding Video Sharing Propagation in Social
Networks: Measurement and Analysis",
journal = j-TOMM,
volume = "10",
number = "4",
pages = "33:1--33:??",
month = jun,
year = "2014",
DOI = "https://doi.org/10.1145/2594440",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 8 11:32:58 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Modern online social networking has drastically
changed the information distribution landscape.
Recently, video has become one of the most important
types of objects spreading among social networking
service users. The sheer and ever-increasing data
volume, the broader coverage, and the longer access
durations of video objects, however, present
significantly more challenges than other types of
objects. This article takes an initial step toward
understanding the unique characteristics of video
sharing propagation in social networks. Based on
realworld data traces from a large-scale online social
network, we examine the user behavior from diverse
aspects and identify different types of users involved
in video propagation. We closely investigate the
temporal distribution during propagation as well as the
typical propagation structures, revealing more details
beyond stationary coverage. We further extend the
conventional epidemic models to accommodate diverse
types of users and their probabilistic viewing and
sharing behaviors. The model, effectively capturing the
essentials of the propagation process, serves as a
valuable basis for such applications as workload
synthesis, traffic prediction, and resource provision
of video servers.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "33",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wang:2014:BCM,
author = "Zhiyu Wang and Peng Cui and Lexing Xie and Wenwu Zhu
and Yong Rui and Shiqiang Yang",
title = "Bilateral Correspondence Model for Words-and-Pictures
Association in Multimedia-Rich Microblogs",
journal = j-TOMM,
volume = "10",
number = "4",
pages = "34:1--34:??",
month = jun,
year = "2014",
DOI = "https://doi.org/10.1145/2611388",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 8 11:32:58 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Nowadays, the amount of multimedia contents in
microblogs is growing significantly. More than 20\% of
microblogs link to a picture or video in certain large
systems. The rich semantics in microblogs provides an
opportunity to endow images with higher-level semantics
beyond object labels. However, this raises new
challenges for understanding the association between
multimodal multimedia contents in multimedia-rich
microblogs. Disobeying the fundamental assumptions of
traditional annotation, tagging, and retrieval systems,
pictures and words in multimedia-rich microblogs are
loosely associated and a correspondence between
pictures and words cannot be established. To address
the aforementioned challenges, we present the first
study analyzing and modeling the associations between
multimodal contents in microblog streams, aiming to
discover multimodal topics from microblogs by
establishing correspondences between pictures and words
in microblogs. We first use a data-driven approach to
analyze the new characteristics of the words, pictures,
and their association types in microblogs. We then
propose a novel generative model called the Bilateral
Correspondence Latent Dirichlet Allocation (BC-LDA)
model. Our BC-LDA model can assign flexible
associations between pictures and words and is able to
not only allow picture-word co-occurrence with
bilateral directions, but also single modal
association. This flexible association can best fit the
data distribution, so that the model can discover
various types of joint topics and generate pictures and
words with the topics accordingly. We evaluate this
model extensively on a large-scale real multimedia-rich
microblogs dataset. We demonstrate the advantages of
the proposed model in several application scenarios,
including image tagging, text illustration, and topic
discovery. The experimental results demonstrate that
our proposed model can significantly and consistently
outperform traditional approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "34",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lei:2014:FND,
author = "Yanqiang Lei and Guoping Qiu and Ligang Zheng and Jiwu
Huang",
title = "Fast Near-Duplicate Image Detection Using Uniform
Randomized Trees",
journal = j-TOMM,
volume = "10",
number = "4",
pages = "35:1--35:??",
month = jun,
year = "2014",
DOI = "https://doi.org/10.1145/2602186",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 8 11:32:58 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Indexing structure plays an important role in the
application of fast near-duplicate image detection,
since it can narrow down the search space. In this
article, we develop a cluster of uniform randomized
trees (URTs) as an efficient indexing structure to
perform fast near-duplicate image detection. The main
contribution in this article is that we introduce
``uniformity'' and ``randomness'' into the indexing
construction. The uniformity requires classifying the
object images into the same scale subsets. Such a
decision makes good use of the two facts in
near-duplicate image detection, namely: (1) the number
of categories is huge; (2) a single category usually
contains only a small number of images. Therefore, the
uniform distribution is very beneficial to narrow down
the search space and does not significantly degrade the
detection accuracy. The randomness is embedded into the
generation of feature subspace and projection
direction, improving the flexibility of indexing
construction. The experimental results show that the
proposed method is more efficient than the popular
locality-sensitive hashing and more stable and flexible
than the traditional KD-tree.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "35",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yeh:2014:PPR,
author = "Che-Hua Yeh and Brian A. Barsky and Ming Ouhyoung",
title = "Personalized Photograph Ranking and Selection System
Considering Positive and Negative User Feedback",
journal = j-TOMM,
volume = "10",
number = "4",
pages = "36:1--36:??",
month = jun,
year = "2014",
DOI = "https://doi.org/10.1145/2584105",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 8 11:32:58 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article, we propose a novel personalized
ranking system for amateur photographs. The proposed
framework treats the photograph assessment as a ranking
problem and we introduce the idea of personalized
ranking, which ranks photographs considering both their
aesthetic qualities and personal preferences.
Photographs are described using three types of
features: photo composition, color and intensity
distribution, and personalized features. An aesthetic
prediction model is learned from labeled photographs by
using the proposed image features and RBF-ListNet
learning algorithm. The experimental results show that
the proposed framework outperforms in the ranking
performance: a Kendall's tau value of 0.432 is
significantly higher than those obtained by the
features proposed in one of the state-of-the-art
approaches (0.365) and by learning based on support
vector regression (0.384). To realize personalization
in ranking, three approaches are proposed: the
feature-based approach allows users to select
photographs with specific rules, the example-based
approach takes the positive feedback from users to
rerank the photograph, and the list-based approach
takes both positive and negative feedback from users
into consideration. User studies indicate that all
three approaches are effective in both aesthetic and
personalized ranking.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "36",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Tan:2014:PVS,
author = "Song Tan and Yu-Gang Jiang and Chong-Wah Ngo",
title = "Placing Videos on a Semantic Hierarchy for Search
Result Navigation",
journal = j-TOMM,
volume = "10",
number = "4",
pages = "37:1--37:??",
month = jun,
year = "2014",
DOI = "https://doi.org/10.1145/2578394",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 8 11:32:58 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Organizing video search results in a list view is
widely adopted by current commercial search engines,
which cannot support efficient browsing for complex
search topics that have multiple semantic facets. In
this article, we propose to organize video search
results in a highly structured way. Specifically,
videos are placed on a semantic hierarchy that
accurately organizes various facets of a given search
topic. To pick the most suitable videos for each node
of the hierarchy, we define and utilize three important
criteria: relevance, uniqueness, and diversity.
Extensive evaluations on a large YouTube video dataset
demonstrate the effectiveness of our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "37",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Steinmetz:2014:EN,
author = "Ralf Steinmetz",
title = "Editorial Note",
journal = j-TOMM,
volume = "11",
number = "1",
pages = "1:1--1:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2634234",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Mon Sep 1 12:38:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "1",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2014:SBA,
author = "Yong-Jin Liu and Cui-Xia Ma and Qiufang Fu and Xiaolan
Fu and Sheng-Feng Qin and Lexing Xie",
title = "A Sketch-Based Approach for Interactive Organization
of Video Clips",
journal = j-TOMM,
volume = "11",
number = "1",
pages = "2:1--2:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2645643",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Mon Sep 1 12:38:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "With the rapid growth of video resources, techniques
for efficient organization of video clips are becoming
appealing in the multimedia domain. In this article, a
sketch-based approach is proposed to intuitively
organize video clips by: (1) enhancing their narrations
using sketch annotations and (2) structurizing the
organization process by gesture-based free-form
sketching on touch devices. There are two main
contributions of this work. The first is a sketch
graph, a novel representation for the narrative
structure of video clips to facilitate content
organization. The second is a method to perform
context-aware sketch recommendation scalable to large
video collections, enabling common users to easily
organize sketch annotations. A prototype system
integrating the proposed approach was evaluated on the
basis of five different aspects concerning its
performance and usability. Two sketch searching
experiments showed that the proposed context-aware
sketch recommendation outperforms, in terms of accuracy
and scalability, two state-of-the-art sketch searching
methods. Moreover, a user study showed that the sketch
graph is consistently preferred over traditional
representations such as keywords and keyframes. The
second user study showed that the proposed approach is
applicable in those scenarios where the video annotator
and organizer were the same person. The third user
study showed that, for video content organization,
using sketch graph users took on average 1/3 less time
than using a mass-market tool Movie Maker and took on
average 1/4 less time than using a state-of-the-art
sketch alternative. These results demonstrated that the
proposed sketch graph approach is a promising video
organization tool.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "2",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Huang:2014:CSA,
author = "Junshi Huang and Si Liu and Junliang Xing and Tao Mei
and Shuicheng Yan",
title = "Circle \& Search: Attribute-Aware Shoe Retrieval",
journal = j-TOMM,
volume = "11",
number = "1",
pages = "3:1--3:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2632165",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Mon Sep 1 12:38:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Taking the shoe as a concrete example, we present an
innovative product retrieval system that leverages
object detection and retrieval techniques to support a
brand-new online shopping experience in this article.
The system, called Circle \& Search, enables users to
naturally indicate any preferred product by simply
circling the product in images as the visual query, and
then returns visually and semantically similar products
to the users. The system is characterized by
introducing attributes in both the detection and
retrieval of the shoe. Specifically, we first develop
an attribute-aware part-based shoe detection model. By
maintaining the consistency between shoe parts and
attributes, this shoe detector has the ability to model
high-order relations between parts and thus the
detection performance can be enhanced. Meanwhile, the
attributes of this detected shoe can also be predicted
as the semantic relations between parts. Based on the
result of shoe detection, the system ranks all the
shoes in the repository using an attribute refinement
retrieval model that takes advantage of query-specific
information and attribute correlation to provide an
accurate and robust shoe retrieval. To evaluate this
retrieval system, we build a large dataset with 17,151
shoe images, in which each shoe is annotated with 10
shoe attributes e.g., heel height, heel shape, sole
shape, etc.. According to the experimental result and
the user study, our Circle \& Search system achieves
promising shoe retrieval performance and thus
significantly improves the users' online shopping
experience.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "3",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Guan:2014:TAV,
author = "Genliang Guan and Zhiyong Wang and Shaohui Mei and Max
Ott and Mingyi He and David Dagan Feng",
title = "A Top-Down Approach for Video Summarization",
journal = j-TOMM,
volume = "11",
number = "1",
pages = "4:1--4:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2632267",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Mon Sep 1 12:38:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "While most existing video summarization approaches aim
to identify important frames of a video from either a
global or local perspective, we propose a top-down
approach consisting of scene identification and scene
summarization. For scene identification, we represent
each frame with global features and utilize a scalable
clustering method. We then formulate scene
summarization as choosing those frames that best cover
a set of local descriptors with minimal redundancy. In
addition, we develop a visual word-based approach to
make our approach more computationally scalable.
Experimental results on two benchmark datasets
demonstrate that our proposed approach clearly
outperforms the state-of-the-art.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "4",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Pazzi:2014:PPP,
author = "Richard W. Pazzi and Azzedine Boukerche",
title = "{PROPANE}: a Progressive Panorama Streaming Protocol
to Support Interactive {$3$D} Virtual Environment
Exploration on Graphics-Constrained Devices",
journal = j-TOMM,
volume = "11",
number = "1",
pages = "5:1--5:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2602222",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Mon Sep 1 12:38:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Image-Based Rendering (IBR) has become widely known by
its relatively low requirements for generating new
scenes based on a sequence of reference images. This
characteristic of IBR shows a remarkable potential
impact in rendering complex 3D virtual environments on
graphics-constrained devices, such as head-mounted
displays, set-top boxes, media streaming devices, and
so on. If well exploited, IBR coupled with remote
rendering would enable the exploration of complex
virtual environments on these devices. However, remote
rendering requires the transmission of a large volume
of images. In addition, existing solutions consider
limited and/or deterministic navigation schemes as a
means of decreasing the volume of streamed data. This
article proposes the PROgressive PANorama StrEaming
protocol (PROPANE) to offer users a smoother virtual
navigation experience by prestreaming the imagery data
required to generate new views as the user wanders
within a 3D environment. PROPANE is based on a very
simple yet effective trigonometry model and uses a
strafe (lateral movement) technique to minimize the
delay between image updates at the client end. This
article introduces the concept of key partial
panoramas, namely panorama segments that cover
movements in any direction by simply strafing from an
appropriate key partial panorama and streaming the
amount of lost pixels. Therefore, PROPANE can provide a
constrained device with sufficient imagery data to
cover a future user's viewpoints, thereby minimizing
the impact of transmission delay and jitter. PROPANE
has been implemented and compared to two baseline
remote rendering schemes. The evaluation results show
that the proposed technique outperforms the selected
and closely related existing schemes by minimizing the
response time while not limiting the user to predefined
paths as opposed to previous protocols.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "5",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wang:2014:FEM,
author = "Xiangyu Wang and Yong Rui and Mohan Kankanhalli",
title = "{Up-Fusion}: an Evolving Multimedia Fusion Method",
journal = j-TOMM,
volume = "11",
number = "1",
pages = "6:1--6:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2611777",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Mon Sep 1 12:38:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The amount of multimedia data on the Internet has
increased exponentially in the past few decades and
this trend is likely to continue. Multimedia content
inherently has multiple information sources, therefore
effective fusion methods are critical for data analysis
and understanding. So far, most of the existing fusion
methods are static with respect to time, making it
difficult for them to handle the evolving multimedia
content. To address this issue, in recent years,
several evolving fusion methods were proposed, however,
their requirements are difficult to meet, making them
useful only in limited applications. In this article,
we propose a novel evolving fusion method based on the
online portfolio selection theory. The proposed method
takes into account the correlation among different
information sources and evolves the fusion model when
new multimedia data is added. It performs effectively
on both crisp and soft decisions without requiring
additional context information. Extensive experiments
on concept detection and human detection tasks over the
TRECVID dataset and surveillance data have been
conducted and significantly better performance has been
obtained.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "6",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wang:2014:EIP,
author = "Xinxi Wang and Yi Wang and David Hsu and Ye Wang",
title = "Exploration in Interactive Personalized Music
Recommendation: a Reinforcement Learning Approach",
journal = j-TOMM,
volume = "11",
number = "1",
pages = "7:1--7:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2623372",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Mon Sep 1 12:38:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Current music recommender systems typically act in a
greedy manner by recommending songs with the highest
user ratings. Greedy recommendation, however, is
suboptimal over the long term: it does not actively
gather information on user preferences and fails to
recommend novel songs that are potentially interesting.
A successful recommender system must balance the needs
to explore user preferences and to exploit this
information for recommendation. This article presents a
new approach to music recommendation by formulating
this exploration-exploitation trade-off as a
reinforcement learning task. To learn user preferences,
it uses a Bayesian model that accounts for both audio
content and the novelty of recommendations. A
piecewise-linear approximation to the model and a
variational inference algorithm help to speed up
Bayesian inference. One additional benefit of our
approach is a single unified model for both music
recommendation and playlist generation. We demonstrate
the strong potential of the proposed approach with
simulation results and a user study.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "7",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Katti:2014:OEE,
author = "Harish Katti and Anoop Kolar Rajagopal and Mohan
Kankanhalli and Ramakrishnan Kalpathi",
title = "Online Estimation of Evolving Human Visual Interest",
journal = j-TOMM,
volume = "11",
number = "1",
pages = "8:1--8:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2632284",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Mon Sep 1 12:38:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Regions in video streams attracting human interest
contribute significantly to human understanding of the
video. Being able to predict salient and informative
Regions of Interest (ROIs) through a sequence of eye
movements is a challenging problem. Applications such
as content-aware retargeting of videos to different
aspect ratios while preserving informative regions and
smart insertion of dialog (closed-caption text)$^1$
into the video stream can significantly be improved
using the predicted ROIs. We propose an interactive
human-in-the-loop framework to model eye movements and
predict visual saliency into yet-unseen frames. Eye
tracking and video content are used to model visual
attention in a manner that accounts for important
eye-gaze characteristics such as temporal
discontinuities due to sudden eye movements, noise, and
behavioral artifacts. A novel statistical- and
algorithm-based method gaze buffering is proposed for
eye-gaze analysis and its fusion with content-based
features. Our robust saliency prediction is
instantiated for two challenging and exciting
applications. The first application alters video aspect
ratios on-the-fly using content-aware video
retargeting, thus making them suitable for a variety of
display sizes. The second application dynamically
localizes active speakers and places dialog captions
on-the-fly in the video stream. Our method ensures that
dialogs are faithful to active speaker locations and do
not interfere with salient content in the video stream.
Our framework naturally accommodates personalisation of
the application to suit biases and preferences of
individual users.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "8",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ghinea:2014:ISI,
author = "Gheorghita Ghinea and Christian Timmerer and Weisi Lin
and Stephen Gulliver",
title = "Introduction to Special Issue on Multiple Sensorial
{(MulSeMedia)} Multimodal Media: Advances and
Applications",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "9:1--9:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2661333",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "9",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lv:2014:MHF,
author = "Zhihan Lv and Alaa Halawani and Shengzhong Feng and
Haibo Li and Shafiq Ur R{\'e}hman",
title = "Multimodal Hand and Foot Gesture Interaction for
Handheld Devices",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "10:1--10:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2645860",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We present a hand-and-foot-based multimodal
interaction approach for handheld devices. Our method
combines input modalities (i.e., hand and foot) and
provides a coordinated output to both modalities along
with audio and video. Human foot gesture is detected
and tracked using contour-based template detection
(CTD) and Tracking-Learning-Detection (TLD) algorithm.
3D foot pose is estimated from passive homography
matrix of the camera. 3D stereoscopic and vibrotactile
are used to enhance the immersive feeling. We developed
a multimodal football game based on the multimodal
approach as a proof-of-concept. We confirm our systems
user satisfaction through a user study.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "10",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Prasad:2014:DVC,
author = "Manoj Prasad and Murat Russell and Tracy A. Hammond",
title = "Designing Vibrotactile Codes to Communicate Verb
Phrases",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "11:1--11:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2637289",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Soldiers, to guard themselves from enemy assault, have
to maintain visual and auditory awareness of their
environment. Their visual and auditory senses are thus
saturated. This makes these channels less usable for
communication. The tactile medium of communication with
users is appropriate for displaying information in such
situations. Research in interpersonal communication
among soldiers shows that the most common form of
communication between soldiers involves the use of verb
phrases. In this article, we have developed a
three-by-three tactile display and proposed a method
for mapping the components of a verb phrase to two
dimensions of tactile codes-shape and waveform.
Perception of tactile codes by users depends on the
ability of users to distinguish shape and waveform of
the code. We have proposed a measure to rate the
distinguish-ability of any two shapes and created a
graph-based user-centric model using this measure to
select distinguishable shapes from a set of all
presentable shapes. We conducted two user studies to
evaluate the ability of users to perceive tactile
information. The results from our first study showed
users' ability to perceive tactile shapes, tactile
waveforms, and form verb phrases from tactile codes.
The recognition accuracy and time taken to distinguish
were better when the shapes were selected from the
graph model than when shapes were chosen based on
intuition. The second user study was conducted to test
the performance of users while performing a primary
visual task simultaneously with a secondary audio or
haptic task. Users were more familiar with perceiving
information from an auditory medium than from a haptic
medium, which was reflected in their performance. Thus
the performance of users in the primary visual task was
better while using an audio medium of communication
than while using a haptic medium of communication.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "11",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Murray:2014:MSE,
author = "Niall Murray and Brian Lee and Yuansong Qiao and
Gabriel-Miro Muntean",
title = "Multiple-Scent Enhanced Multimedia Synchronization",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "12:1--12:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2637293",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This study looked at users' perception of interstream
synchronization between audiovisual media and two
olfactory streams. The ability to detect skews and the
perception and impact of skews on user Quality of
Experience (QoE) is analyzed. The olfactory streams are
presented with the same skews (i.e., delay) and with
variable skews (i.e., jitter and mix of scents). This
article reports the limits beyond which
desynchronization reduces user-perceived quality
levels. Also, a minimum gap between the presentations
of consecutive scents is identified, necessary to
ensuring enhanced user-perceived quality. There is no
evidence (not considering scent type) that overlapping
or mixing of scents increases user QoE levels for
olfaction-enhanced multimedia.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "12",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Kroupi:2014:ECP,
author = "Eleni Kroupi and Ashkan Yazdani and Jean-Marc Vesin
and Touradj Ebrahimi",
title = "{EEG} Correlates of Pleasant and Unpleasant Odor
Perception",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "13:1--13:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2637287",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Olfaction-enhanced multimedia experience is becoming
vital for strengthening the sensation of reality and
the quality of user experience. One approach to
investigate olfactory perception is to analyze the
alterations in brain activity during stimulation with
different odors. In this article, the changes in the
electroencephalogram (EEG) when perceiving
hedonically-different odors are studied. Results of
within and across-subject analysis are presented. We
show that EEG-based odor classification using brain
activity is possible and can be used to automatically
recognize odor pleasantness when a subject-specific
classifier is trained. However, it is a challenging
problem to design a generic classifier.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "13",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Rainer:2014:GUM,
author = "Benjamin Rainer and Christian Timmerer",
title = "A Generic Utility Model Representing the Quality of
Sensory Experience",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "14:1--14:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2648429",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Current QoE research is mainly focusing on single
modalities (audio, visual) or combinations thereof. In
our research, we propose annotating traditional
multimedia content with additional sensory effects,
such as ambient light, vibration, wind, and olfaction,
which could potentially stimulate all human senses.
Investigating the influence of individual sensory
effects and combinations thereof is important in order
to understand how these individual sensory effects
influence the Quality of Experience (QoE) as a whole.
In this article, we describe the results of such a
subjective quality assessment of audio-visual sequences
which are annotated with additional sensory effects
such as ambient light, wind, and vibration using the
MPEG-V standard. The results of this assessment allow
us to derive a utility model representing the Quality
of Sensory Experience (QuaSE) complementary to existing
QoE models described in terms of Quality of Service
(QoS) parameters. For validating our proposed utility
model, we provide an example instantiation and validate
it against results of subjective quality assessments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "14",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yuan:2014:UQE,
author = "Zhenhui Yuan and Shengyang Chen and Gheorghita Ghinea
and Gabriel-Miro Muntean",
title = "User Quality of Experience of Mulsemedia
Applications",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "15:1--15:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2661329",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "User Quality of Experience (QoE) is of fundamental
importance in multimedia applications and has been
extensively studied for decades. However, user QoE in
the context of the emerging multiple-sensorial media
(mulsemedia) services, which involve different media
components than the traditional multimedia
applications, have not been comprehensively studied.
This article presents the results of subjective tests
which have investigated user perception of mulsemedia
content. In particular, the impact of intensity of
certain mulsemedia components including haptic and
airflow on user-perceived experience are studied.
Results demonstrate that by making use of mulsemedia
the overall user enjoyment levels increased by up to
77\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "15",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Luque:2014:IMS,
author = "Francisco Pedro Luque and Iris Galloso and Claudio
Feijoo and Carlos Alberto Mart{\'\i}n and Guillermo
Cisneros",
title = "Integration of Multisensorial Stimuli and Multimodal
Interaction in a Hybrid {$3$DTV} System",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "16:1--16:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2617992",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article proposes the integration of
multisensorial stimuli and multimodal interaction
components into a sports multimedia asset under two
dimensions: immersion and interaction. The first
dimension comprises a binaural audio system and a set
of sensory effects synchronized with the audiovisual
content, whereas the second explores interaction
through the insertion of interactive 3D objects into
the main screen and on-demand presentation of
additional information in a second touchscreen. We
present an end-to-end solution integrating these
components into a hybrid (internet-broadcast)
television system using current 3DTV standards. Results
from an experimental study analyzing the perceived
quality of these stimuli and their influence on the
Quality of Experience are presented.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "16",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ghinea:2014:MSA,
author = "Gheorghita Ghinea and Christian Timmerer and Weisi Lin
and Stephen R. Gulliver",
title = "Mulsemedia: State of the Art, Perspectives, and
Challenges",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "17:1--17:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2617994",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Mulsemedia-multiple sensorial media-captures a wide
variety of research efforts and applications. This
article presents a historic perspective on mulsemedia
work and reviews current developments in the area.
These take place across the traditional multimedia
spectrum-from virtual reality applications to computer
games-as well as efforts in the arts, gastronomy, and
therapy, to mention a few. We also describe
standardization efforts, via the MPEG-V standard, and
identify future developments and exciting challenges
the community needs to overcome.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "17",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zha:2014:ISI,
author = "Zheng-Jun Zha and Lei Zhang and Max M{\"u}hlh{\"a}user
and Alan F. Smeaton",
title = "Introduction to the Special Issue Best Papers of {ACM
Multimedia 2013}",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "18:1--18:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2661331",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "18",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Fang:2014:DGI,
author = "Quan Fang and Jitao Sang and Changsheng Xu",
title = "Discovering Geo-Informative Attributes for Location
Recognition and Exploration",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "19:1--19:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2648581",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article considers the problem of automatically
discovering geo-informative attributes for location
recognition and exploration. The attributes are
expected to be both discriminative and representative,
which correspond to certain distinctive visual patterns
and associate with semantic interpretations. For our
solution, we analyze the attribute at the region level.
Each segmented region in the training set is assigned a
binary latent variable indicating its discriminative
capability. A latent learning framework is proposed for
discriminative region detection and geo-informative
attribute discovery. Moreover, we use user-generated
content to obtain the semantic interpretation for the
discovered visual attributes. Discriminative and
search-based attribute annotation methods are developed
for geo-informative attribute interpretation. The
proposed approach is evaluated on one challenging
dataset including GoogleStreetView and Flickr photos.
Experimental results show that (1) geo-informative
attributes are discriminative and useful for location
recognition; (2) the discovered semantic interpretation
is meaningful and can be exploited for further location
exploration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "19",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Liu:2014:WYB,
author = "Luoqi Liu and Junliang Xing and Si Liu and Hui Xu and
Xi Zhou and Shuicheng Yan",
title = "{``Wow! You Are So Beautiful Today!''}",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "20:1--20:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2659234",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Beauty e-Experts, a fully automatic system for
makeover recommendation and synthesis, is developed in
this work. The makeover recommendation and synthesis
system simultaneously considers many kinds of makeover
items on hairstyle and makeup. Given a user-provided
frontal face image with short/bound hair and no/light
makeup, the Beauty e-Experts system not only recommends
the most suitable hairdo and makeup, but also
synthesizes the virtual hairdo and makeup effects. To
acquire enough knowledge for beauty modeling, we built
the Beauty e-Experts Database, which contains 1,505
female photos with a variety of attributes annotated
with different discrete values. We organize these
attributes into two different categories, beauty
attributes and beauty-related attributes. Beauty
attributes refer to those values that are changeable
during the makeover process and thus need to be
recommended by the system. Beauty-related attributes
are those values that cannot be changed during the
makeup process but can help the system to perform
recommendation. Based on this Beauty e-Experts Dataset,
two problems are addressed for the Beauty e-Experts
system: what to recommend and how to wear it, which
describes a similar process of selecting hairstyle and
cosmetics in daily life. For the what-to-recommend
problem, we propose a multiple tree-structured
supergraph model to explore the complex relationships
among high-level beauty attributes, mid-level
beauty-related attributes, and low-level image
features. Based on this model, the most compatible
beauty attributes for a given facial image can be
efficiently inferred. For the how-to-wear-it problem,
an effective and efficient facial image synthesis
module is designed to seamlessly synthesize the
recommended makeovers into the user facial image. We
have conducted extensive experiments on testing images
of various conditions to evaluate and analyze the
proposed system. The experimental results well
demonstrate the effectiveness and efficiency of the
proposed system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "20",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhang:2014:AAS,
author = "Hanwang Zhang and Zheng-Jun Zha and Yang Yang and
Shuicheng Yan and Yue Gao and Tat-Seng Chua",
title = "Attribute-Augmented Semantic Hierarchy: Towards a
Unified Framework for Content-Based Image Retrieval",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "21:1--21:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2637291",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article presents a novel attribute-augmented
semantic hierarchy (A$^2$ SH) and demonstrates its
effectiveness in bridging both the semantic and
intention gaps in content-based image retrieval (CBIR).
A$^2$ SH organizes semantic concepts into multiple
semantic levels and augments each concept with a set of
related attributes. The attributes are used to describe
the multiple facets of the concept and act as the
intermediate bridge connecting the concept and
low-level visual content. An hierarchical semantic
similarity function is learned to characterize the
semantic similarities among images for retrieval. To
better capture user search intent, a hybrid feedback
mechanism is developed, which collects hybrid feedback
on attributes and images. This feedback is then used to
refine the search results based on A$^2$ SH. We use
A$^2$ SH as a basis to develop a unified content-based
image retrieval system. We conduct extensive
experiments on a large-scale dataset of over one
million Web images. Experimental results show that the
proposed A$^2$ SH can characterize the semantic
affinities among images accurately and can shape user
search intent quickly, leading to more accurate search
results as compared to state-of-the-art CBIR
solutions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "21",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhao:2014:SSS,
author = "Xin Zhao and Xue Li and Chaoyi Pang and Quan Z. Sheng
and Sen Wang and Mao Ye",
title = "Structured Streaming Skeleton --- A New Feature for
Online Human Gesture Recognition",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "22:1--22:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2648583",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Online human gesture recognition has a wide range of
applications in computer vision, especially in
human-computer interaction applications. The recent
introduction of cost-effective depth cameras brings a
new trend of research on body-movement gesture
recognition. However, there are two major challenges:
(i) how to continuously detect gestures from
unsegmented streams, and (ii) how to differentiate
different styles of the same gesture from other types
of gestures. In this article, we solve these two
problems with a new effective and efficient feature
extraction method-Structured Streaming Skeleton
(SSS)-which uses a dynamic matching approach to
construct a feature vector for each frame. Our
comprehensive experiments on MSRC-12 Kinect Gesture,
Huawei/3DLife-2013, and MSR-Action3D datasets have
demonstrated superior performances than the
state-of-the-art approaches. We also demonstrate model
selection based on the proposed SSS feature, where the
classifier of squared loss regression with l$_{2, 1}$
norm regularization is a recommended classifier for
best performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "22",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Carbunar:2014:EFN,
author = "Bogdan Carbunar and Rahul Potharaju and Michael Pearce
and Venugopal Vasudevan and Michael Needham",
title = "Errata for: {A Framework for Network Aware Caching for
Video on Demand Systems}",
journal = j-TOMM,
volume = "11",
number = "1s",
pages = "23:1--23:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2661298",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Oct 3 12:44:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
note = "See \cite{Carbunar:2013:FNA}.",
abstract = "Some errors were introduced into this article in the
preparation of the final source files. The errors are
summarized in the following text and revised pages with
the corrected elements indicated in red are provided.
The full corrected article can be accessed in the ACM
DL, DOI https://doi.org/10.1145/2501643.2501652 -Page
8: New Figure 6(a) -Page 16: New Figures 8(a), 8(b),
and 9(a) -Page 17: New Figure 10(b) -Page 18: New
Figures 11 and 12; corrected text reference -Page 19:
Final sentence deleted",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "23",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhang:2014:AGS,
author = "Ying Zhang and Luming Zhang and Roger Zimmermann",
title = "Aesthetics-Guided Summarization from Multiple User
Generated Videos",
journal = j-TOMM,
volume = "11",
number = "2",
pages = "24:1--24:??",
month = dec,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2659520",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Jan 7 17:48:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In recent years, with the rapid development of camera
technology and portable devices, we have witnessed a
flourish of user generated videos, which are gradually
reshaping the traditional professional video oriented
media market. The volume of user generated videos in
repositories is increasing at a rapid rate. In today's
video retrieval systems, a simple query will return
many videos which seriously increase the viewing
burden. To manage these video retrievals and provide
viewers with an efficient way to browse, we introduce a
system to automatically generate a summarization from
multiple user generated videos and present their
salience to viewers in an enjoyable manner. Among
multiple consumer videos, we find their qualities to be
highly diverse due to various factors such as a
photographer's experience or environmental conditions
at the time of capture. Such quality inspires us to
include a video quality evaluation component into the
video summarization since videos with poor qualities
can seriously degrade the viewing experience. We first
propose a probabilistic model to evaluate the aesthetic
quality of each user generated video. This model
compares the rich aesthetics information from several
well-known photo databases with generic unlabeled
consumer videos, under a human perception component
indicating the correlation between a video and its
constituting frames. Subjective studies were carried
out with the results indicating that our method is
reliable. Then a novel graph-based formulation is
proposed for the multi-video summarization task.
Desirable summarization criteria is incorporated as the
graph attributes and the problem is solved through a
dynamic programming framework. Comparisons with several
state-of-the-art methods demonstrate that our algorithm
performs better than other methods in generating a
skimming video in preserving the essential scenes from
the original multiple input videos, with smooth
transitions among consecutive segments and appealing
aesthetics overall.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "24",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Calagari:2014:AAL,
author = "Kiana Calagari and Mohammad Reza Pakravan and Shervin
Shirmohammadi and Mohamed Hefeeda",
title = "{ALP}: Adaptive Loss Protection Scheme with Constant
Overhead for Interactive Video Applications",
journal = j-TOMM,
volume = "11",
number = "2",
pages = "25:1--25:??",
month = dec,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2656203",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Jan 7 17:48:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "There has been an increasing demand for interactive
video transmission over the Internet for applications
such as video conferencing, video calls, and
telepresence applications. These applications are
increasingly moving towards providing High Definition
(HD) video quality to users. A key challenge in these
applications is to preserve the quality of video when
it is transported over best-effort networks that do not
guarantee lossless transport of video packets. In such
conditions, it is important to protect the transmitted
video by using intelligent and adaptive protection
schemes. Applications such as HD video conferencing
require live interaction among participants, which
limits the overall delay the system can tolerate.
Therefore, the protection scheme should add little or
no extra delay to video transport. We propose a novel
Adaptive Loss Protection (ALP) scheme for interactive
HD video applications such as video conferencing and
video chats. This scheme adds negligible delay to the
transmission process and is shown to achieve better
quality than other schemes in lossy networks. The
proposed ALP scheme adaptively applies four different
protection modes to cope with the dynamic network
conditions, which results in high video quality in all
network conditions. Our ALP scheme consists of four
protection modes; each of these modes utilizes a
protection method. Two of the modes rely on the
state-of-the-art protection methods, and we propose a
new Integrated Loss Protection (ILP) method for the
other two modes. In the ILP method we integrate three
factors for distributing the protection among packets.
These three factors are error propagation, region of
interest and header information. In order to decide
when to switch between the protection modes, a new
metric is proposed based on the effectiveness of each
mode in performing protection, rather than just
considering network statistics such as packet loss
rate. Results show that by using this metric not only
the overall quality will be improved but also the
variance of quality will decrease. One of the main
advantages of the proposed ALP scheme is that it does
not increase the bit rate overhead in poor network
conditions. Our results show a significant gain in
video quality, up to 3dB PSNR improvement is achieved
using our scheme, compared to protecting all packets
equally with the same amount of overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "25",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ren:2014:BGO,
author = "Dongni Ren and Yisheng Xu and S.-H. Gary Chan",
title = "Beyond {1Mbps} Global Overlay Live Streaming: The Case
of Proxy Helpers",
journal = j-TOMM,
volume = "11",
number = "2",
pages = "26:1--26:??",
month = dec,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2652485",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Jan 7 17:48:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In order to provide live streaming over the global
Internet, a content provider often deploys an overlay
network consisting of distributed proxies placed close
to user pools. Streaming of multi-Mbps video over such
an overlay is challenging because of bandwidth
bottlenecks in paths. To effectively overcome these
bottlenecks, we consider employing proxy helpers in the
overlay to provide rich path diversity. The helpers do
not have any attached users, and hence may forward
partial video streams (or not at all) if necessary. In
this way, the helpers serve as stepping stones to
supply full streams to the servers. The issue is how to
involve the helpers in the overlay to achieve low
streaming delay meeting a certain high streaming
bitrate requirement. To address the issue, we first
formulate the problem which captures various delay and
bandwidth components, and show that it is NP-hard. We
then propose an efficient algorithm called
Stepping-Stones (SS) which can be efficiently
implemented in a controller. Given the encouraging
simulation results, we develop a novel streaming
testbed for SS and explore, through sets of Internet
experiments, the effectiveness of helpers to achieve
high bitrate (multi-Mbps) global live streaming. In our
experiments, proxies are deployed with a reasonably
wide global footprint. We collect more than a hundred
hours of streaming traces with bitrate ranging from
500kbps to a few Mbps. Our experimental data validates
that helpers indeed play an important role in achieving
high bitrate in today's Internet. Global multi-Mbps
streaming is possible due to their multihop and
multipath advantages. Our experimental trials and data
also provide valuable insights on the design of a
global push-based streaming network. There are strong
benefits of using proxy helpers to achieve high bitrate
and low delay.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "26",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Qian:2014:SEC,
author = "Shengsheng Qian and Tianzhu Zhang and Changsheng Xu
and M. Shamim Hossain",
title = "Social Event Classification via Boosted Multimodal
Supervised Latent {Dirichlet} Allocation",
journal = j-TOMM,
volume = "11",
number = "2",
pages = "27:1--27:??",
month = dec,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2659521",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Jan 7 17:48:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "With the rapidly increasing popularity of social media
sites (e.g., Flickr, YouTube, and Facebook), it is
convenient for users to share their own comments on
many social events, which successfully facilitates
social event generation, sharing and propagation and
results in a large amount of user-contributed media
data (e.g., images, videos, and text) for a wide
variety of real-world events of different types and
scales. As a consequence, it has become more and more
difficult to exactly find the interesting events from
massive social media data, which is useful to browse,
search and monitor social events by users or
governments. To deal with these issues, we propose a
novel boosted multimodal supervised Latent Dirichlet
Allocation (BMM-SLDA) for social event classification
by integrating a supervised topic model, denoted as
multi-modal supervised Latent Dirichlet Allocation
(mm-SLDA), in the boosting framework. Our proposed
BMM-SLDA has a number of advantages. (1) Our mm-SLDA
can effectively exploit the multimodality and the
multiclass property of social events jointly, and make
use of the supervised category label information to
classify multiclass social event directly. (2) It is
suitable for large-scale data analysis by utilizing
boosting weighted sampling strategy to iteratively
select a small subset of data to efficiently train the
corresponding topic models. (3) It effectively exploits
social event structure by the document weight
distribution with classification error and can
iteratively learn new topic model to correct the
previously misclassified event documents. We evaluate
our BMM-SLDA on a real world dataset and show extensive
experimental results, which demonstrate that our model
outperforms state-of-the-art methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "27",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Ye:2014:OBL,
author = "Jun Ye and Kien A. Hua",
title = "Octree-Based {$3$D} Logic and Computation of Spatial
Relationships in Live Video Query Processing",
journal = j-TOMM,
volume = "11",
number = "2",
pages = "28:1--28:??",
month = dec,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2645864",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Jan 7 17:48:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Live video computing (LVC) on distributed smart
cameras has many important applications; and a database
approach based on a Live Video DataBase Management
System (LVDBMS) has shown to be effective for general
LVC application development. The performance of such a
database system relies on accurate interpretation of
spatial relationships among objects in the live video.
With the popularity of affordable depth cameras, 3D
spatial computation techniques have been applied.
However, the 3D object models currently used are
expensive to compute, and offer limited scalability. We
address this drawback in this article by proposing an
octree-based 3D spatial logic and presenting algorithms
for computing 3D spatial relationships using depth
cameras. To support continuous query processing on live
video streams, we also develop a GPU-based
implementation of the proposed technique to further
enhance scalability for real-time applications.
Extensive performance studies based on a public RGB-D
dataset as well as the LVDBMS prototype demonstrates
the correctness and efficiency of our techniques.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "28",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yin:2014:STT,
author = "Yifang Yin and Zhijie Shen and Luming Zhang and Roger
Zimmermann",
title = "Spatial-Temporal Tag Mining for Automatic Geospatial
Video Annotation",
journal = j-TOMM,
volume = "11",
number = "2",
pages = "29:1--29:??",
month = dec,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2658981",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Jan 7 17:48:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Videos are increasingly geotagged and used in
practical and powerful GIS applications. However, video
search and management operations are typically
supported by manual textual annotations, which are
subjective and laborious. Therefore, research has been
conducted to automate or semi-automate this process.
Since a diverse vocabulary for video annotations is of
paramount importance towards good search results, this
article proposes to leverage crowdsourced data from
social multimedia applications that host tags of
diverse semantics to build a spatio-temporal tag
repository, consequently acting as input to our
auto-annotation approach. In particular, to build the
tag store, we retrieve the necessary data from several
social multimedia applications, mine both the spatial
and temporal features of the tags, and then refine and
index them accordingly. To better integrate the tag
repository, we extend our previous approach by
leveraging the temporal characteristics of videos as
well. Moreover, we set up additional ranking criteria
on the basis of tag similarity, popularity and location
bias. Experimental results demonstrate that, by making
use of such a tag repository, the generated tags have a
wide range of semantics, and the resulting rankings are
more consistent with human perception.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "29",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lin:2014:LAM,
author = "Chih-Wei Lin and Kuan-Wen Chen and Shen-Chi Chen and
Cheng-Wu Chen and Yi-Ping Hung",
title = "Large-Area, Multilayered, and High-Resolution Visual
Monitoring Using a Dual-Camera System",
journal = j-TOMM,
volume = "11",
number = "2",
pages = "30:1--30:??",
month = dec,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2645862",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Jan 7 17:48:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Large-area, high-resolution visual monitoring systems
are indispensable in surveillance applications. To
construct such systems, high-quality image capture and
display devices are required. Whereas high-quality
displays have rapidly developed, as exemplified by the
announcement of the 85-inch 4K ultrahigh-definition TV
by Samsung at the 2013 Consumer Electronics Show (CES),
high-resolution surveillance cameras have progressed
slowly and remain not widely used compared with
displays. In this study, we designed an innovative
framework, using a dual-camera system comprising a
wide-angle fixed camera and a high-resolution
pan-tilt-zoom (PTZ) camera to construct a large-area,
multilayered, and high-resolution visual monitoring
system that features multiresolution monitoring of
moving objects. First, we developed a novel calibration
approach to estimate the relationship between the two
cameras and calibrate the PTZ camera. The PTZ camera
was calibrated based on the consistent property of
distinct pan-tilt angle at various zooming factors,
accelerating the calibration process without affecting
accuracy; this calibration process has not been
reported previously. After calibrating the dual-camera
system, we used the PTZ camera and synthesized a
large-area and high-resolution background image. When
foreground targets were detected in the images captured
by the wide-angle camera, the PTZ camera was controlled
to continuously track the user-selected target. Last,
we integrated preconstructed high-resolution background
and low-resolution foreground images captured using the
wide-angle camera and the high-resolution foreground
image captured using the PTZ camera to generate a
large-area, multilayered, and high-resolution view of
the scene.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "30",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Deng:2014:TFP,
author = "Zhengyu Deng and Ming Yan and Jitao Sang and
Changsheng Xu",
title = "{Twitter} is Faster: Personalized Time-Aware Video
Recommendation from {Twitter} to {YouTube}",
journal = j-TOMM,
volume = "11",
number = "2",
pages = "31:1--31:??",
month = dec,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2637285",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Jan 7 17:48:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Traditional personalized video recommendation methods
focus on utilizing user profile or user history
behaviors to model user interests, which follows a
static strategy and fails to capture the swift shift of
the short-term interests of users. According to our
cross-platform data analysis, the information emergence
and propagation is faster in social textual
stream-based platforms than that in multimedia sharing
platforms at micro user level. Inspired by this, we
propose a dynamic user modeling strategy to tackle
personalized video recommendation issues in the
multimedia sharing platform YouTube, by transferring
knowledge from the social textual stream-based platform
Twitter. In particular, the cross-platform video
recommendation strategy is divided into two steps. (1)
Real-time hot topic detection: the hot topics that
users are currently following are extracted from users'
tweets, which are utilized to obtain the related videos
in YouTube. (2) Time-aware video recommendation: for
the target user in YouTube, the obtained videos are
ranked by considering the user profile in YouTube, time
factor, and quality factor to generate the final
recommendation list. In this way, the short-term (hot
topics) and long-term (user profile) interests of users
are jointly considered. Carefully designed experiments
have demonstrated the advantages of the proposed
method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "31",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Hu:2014:SFV,
author = "Yongtao Hu and Jan Kautz and Yizhou Yu and Wenping
Wang",
title = "Speaker-Following Video Subtitles",
journal = j-TOMM,
volume = "11",
number = "2",
pages = "32:1--32:??",
month = dec,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2632111",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Jan 7 17:48:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We propose a new method for improving the presentation
of subtitles in video (e.g., TV and movies). With
conventional subtitles, the viewer has to constantly
look away from the main viewing area to read the
subtitles at the bottom of the screen, which disrupts
the viewing experience and causes unnecessary
eyestrain. Our method places on-screen subtitles next
to the respective speakers to allow the viewer to
follow the visual content while simultaneously reading
the subtitles. We use novel identification algorithms
to detect the speakers based on audio and visual
information. Then the placement of the subtitles is
determined using global optimization. A comprehensive
usability study indicated that our subtitle placement
method outperformed both conventional fixed-position
subtitling and another previous dynamic subtitling
method in terms of enhancing the overall viewing
experience and reducing eyestrain.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "32",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chen:2015:ISI,
author = "Kuan-Ta Chen and Songqing Chen and Wei Tsang Ooi",
title = "Introduction to the Special Issue on {MMSys 2014} and
{NOSSDAV 2014}",
journal = j-TOMM,
volume = "11",
number = "2s",
pages = "41:1--41:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2717509",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Feb 25 17:56:15 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "41",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Schaber:2015:CAM,
author = "Philipp Schaber and Stephan Kopf and Sina Wetzel and
Tyler Ballast and Christoph Wesch and Wolfgang
Effelsberg",
title = "{CamMark}: Analyzing, Modeling, and Simulating
Artifacts in Camcorder Copies",
journal = j-TOMM,
volume = "11",
number = "2s",
pages = "42:1--42:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700295",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Feb 25 17:56:15 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "To support the development of any system that includes
the generation and evaluation of camcorder copies, as
well as to provide a common benchmark for robustness
against camcorder copies, we present a tool to simulate
digital video re-acquisition using a digital video
camera. By resampling each video frame, we simulate the
typical artifacts occurring in a camcorder copy:
geometric modifications (aspect ratio changes,
cropping, perspective and lens distortion), temporal
sampling artifacts (due to different frame rates,
shutter speeds, rolling shutters, or playback), spatial
and color subsampling (rescaling, filtering, Bayer
color filter array), and processing steps (automatic
gain control, automatic white balance). We also support
the simulation of camera movement (e.g., a hand-held
camera) and background insertion. Furthermore, we allow
for an easy setup and calibration of all the simulated
artifacts, using sample/reference pairs of images and
videos. Specifically temporal subsampling effects are
analyzed in detail to create realistic frame blending
artifacts in the simulated copies. We carefully
evaluated our entire camcorder simulation system and
found that the models we developed describe and match
the real artifacts quite well.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "42",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Toni:2015:OSA,
author = "Laura Toni and Ramon Aparicio-Pardo and Karine Pires
and Gwendal Simon and Alberto Blanc and Pascal
Frossard",
title = "Optimal Selection of Adaptive Streaming
Representations",
journal = j-TOMM,
volume = "11",
number = "2s",
pages = "43:1--43:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700294",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Feb 25 17:56:15 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Adaptive streaming addresses the increasing and
heterogeneous demand of multimedia content over the
Internet by offering several encoded versions for each
video sequence. Each version (or representation) is
characterized by a resolution and a bit rate, and it is
aimed at a specific set of users, like TV or mobile
phone clients. While most existing works on adaptive
streaming deal with effective playout-buffer control
strategies on the client side, in this article we take
a providers' perspective and propose solutions to
improve user satisfaction by optimizing the set of
available representations. We formulate an integer
linear program that maximizes users' average
satisfaction, taking into account network dynamics,
type of video content, and user population
characteristics. The solution of the optimization is a
set of encoding parameters corresponding to the
representations set that maximizes user satisfaction.
We evaluate this solution by simulating multiple
adaptive streaming sessions characterized by realistic
network statistics, showing that the proposed solution
outperforms commonly used vendor recommendations, in
terms of user satisfaction but also in terms of
fairness and outage probability. The simulation results
show that video content information as well as network
constraints and users' statistics play a crucial role
in selecting proper encoding parameters to provide
fairness among users and to reduce network resource
usage. We finally propose a few theoretical guidelines
that can be used, in realistic settings, to choose the
encoding parameters based on the user characteristics,
the network capacity and the type of video content.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "43",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chen:2015:ADF,
author = "Liang Chen and Yipeng Zhou and Dah Ming Chiu",
title = "Analysis and Detection of Fake Views in Online Video
Services",
journal = j-TOMM,
volume = "11",
number = "2s",
pages = "44:1--44:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700290",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Feb 25 17:56:15 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Online video-on-demand(VoD) services invariably
maintain a view count for each video they serve, and it
has become an important currency for various
stakeholders, from viewers, to content owners,
advertizers, and the online service providers
themselves. There is often significant financial
incentive to use a robot (or a botnet) to artificially
create fake views. How can we detect fake views? Can we
detect them (and stop them) efficiently? What is the
extent of fake views with current VoD service
providers? These are the questions we study in this
article. We develop some algorithms and show that they
are quite effective for this problem.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "44",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Song:2015:SVT,
author = "Minseok Song and Yeongju Lee and Jinhan Park",
title = "Scheduling a Video Transcoding Server to Save Energy",
journal = j-TOMM,
volume = "11",
number = "2s",
pages = "45:1--45:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700282",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Feb 25 17:56:15 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Recent popular streaming services such as TV
Everywhere, N-Screen, and dynamic adaptive streaming
over HTTP (DASH) need to deliver content to the wide
range of devices, requiring video content to be
transcoded into different versions. Transcoding tasks
require a lot of computation, and each task typically
has its own real-time constraint. These make it
difficult to manage transcoding, but the more efficient
use of energy in servers is an imperative. We
characterize transcoding workloads in terms of
deadlines and computation times, and propose a new
dynamic voltage and frequency scaling (DVFS) scheme
that allocates a frequency and a workload to each CPU
with the aim of minimizing power consumption while
meeting all transcoding deadlines. This scheme has been
simulated, and also implemented in a Linux transcoding
server, in which a frontend node distributes
transcoding requests to heterogeneous backend nodes.
This required a new protocol for communication between
nodes, a DVFS management scheme to reduce power
consumption and thread management and scheduling
schemes which ensure that transcoding deadlines are
met. Power measurements show that this approach can
reduce system-wide energy consumption by 17\% to 31\%,
compared with the Linux Ondemand governor.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "45",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Langroodi:2015:DCA,
author = "Mohsen Jamali Langroodi and Joseph Peters and Shervin
Shirmohammadi",
title = "Decoder-Complexity-Aware Encoding of Motion
Compensation for Multiple Heterogeneous Receivers",
journal = j-TOMM,
volume = "11",
number = "2s",
pages = "46:1--46:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700300",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Feb 25 17:56:15 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "For mobile multimedia systems, advances in battery
technology have been much slower than those in memory,
graphics, and processing power, making power
consumption a major concern in mobile systems. The
computational complexity of video codecs, which
consists of CPU operations and memory accesses, is one
of the main factors affecting power consumption. In
this article, we propose a method that achieves
near-optimal video quality while respecting
user-defined bounds on the complexity needed to decode
a video. We specifically focus on the motion
compensation process, including motion vector
prediction and interpolation, because it is the single
largest component of computation-based power
consumption. We start by formulating a scenario with a
single receiver as a rate-distortion optimization
problem and we develop an efficient
decoder-complexity-aware video encoding method to solve
it. Then we extend our approach to handle multiple
heterogeneous receivers, each with a different
complexity requirement. We test our method
experimentally using the H.264 standard for the single
receiver scenario and the H.264 SVC extension for the
multiple receiver scenario. Our experimental results
show that our method can achieve up to 97\% of the
optimal solution value in the single receiver scenario,
and an average of 97\% of the optimal solution value in
the multiple receiver scenario. Furthermore, our tests
with actual power measurements show a power saving of
up to 23\% at the decoder when the complexity threshold
is halved in the encoder.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "46",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chen:2015:TAT,
author = "Shannon Chen and Zhenhuan Gao and Klara Nahrstedt and
Indranil Gupta",
title = "{$3$DTI} Amphitheater: Towards {$3$DTI} Broadcasting",
journal = j-TOMM,
volume = "11",
number = "2s",
pages = "47:1--47:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700297",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Wed Feb 25 17:56:15 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "3DTI Amphitheater is a live broadcasting system for
dissemination of 3DTI (3D Tele-immersive) content. The
virtual environment constructed by the system mimics an
amphitheater in the real world, where performers
interact with each other in the central circular stage,
and the audience is placed in virtual seats that
surround the stage. Users of the Amphitheater can be
geographically dispersed and the streams created by the
performer sites are disseminated in a P2P network among
the participants. To deal with the high bandwidth
demand and strict latency bound of the service, we
identify the hierarchical priority of streams in
construction of the content dissemination forest.
Result shows that the Amphitheater outperforms prior
3DTI systems by boosting the application QoS by a
factor of 2.8 while sustaining the same hundred-scale
audience group.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "47",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chen:2015:PMV,
author = "Ke Chen and Zhong Zhou and Wei Wu",
title = "Progressive Motion Vector Clustering for Motion
Estimation and Auxiliary Tracking",
journal = j-TOMM,
volume = "11",
number = "3",
pages = "33:1--33:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700296",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Feb 5 17:03:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The motion vector similarity between neighboring
blocks is widely used in motion estimation algorithms.
However, for nonneighboring blocks, they may also have
similar motions due to close depths or belonging to the
same object inside the scene. Therefore, the motion
vectors usually have several kinds of patterns, which
reveal a clustering structure. In this article, we
propose a progressive clustering algorithm, which
periodically counts the motion vectors of the past
blocks to make incremental clustering statistics. These
statistics are used as the motion vector predictors for
the following blocks. It is proved to be much more
efficient for one block to find the best-matching
candidate with the predictors. We also design the
clustering based search with CUDA for GPU acceleration.
Another interesting application of the clustering
statistics is persistent static object tracking. Based
on the statistics, several auxiliary tracking areas are
created to guide the object tracking. Even when the
target object has significant changes in appearance or
it disappears occasionally, its position still can be
predicted. The experiments on Xiph.org Video Test Media
dataset illustrate that our clustering based search
algorithm outperforms the mainstream and some
state-of-the-art motion estimation algorithms. It is 33
times faster on average than the full search algorithm
with only slightly higher mean-square error values in
the experiments. The tracking results show that the
auxiliary tracking areas help to locate the target
object effectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "33",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Shen:2015:HFM,
author = "Liquan Shen and Ping An and Zhaoyang Zhang and
Qianqian Hu and Zhengchuan Chen",
title = "A {$3$D--HEVC} Fast Mode Decision Algorithm for
Real-Time Applications",
journal = j-TOMM,
volume = "11",
number = "3",
pages = "34:1--34:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700298",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Feb 5 17:03:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "3D High Efficiency Video Coding (3D-HEVC) is an
extension of the HEVC standard for coding of multiview
videos and depth maps. It inherits the same quadtree
coding structure as HEVC for both components, which
allows recursively splitting into four equal-sized
coding units (CU). One of 11 different prediction modes
is chosen to code a CU in inter-frames. Similar to the
joint model of H.264/AVC, the mode decision process in
HM (reference software of HEVC) is performed using all
the possible depth levels and prediction modes to find
the one with the least rate distortion cost using a
Lagrange multiplier. Furthermore, both motion
estimation and disparity estimation need to be
performed in the encoding process of 3D-HEVC. Those
tools achieve high coding efficiency, but lead to a
significant computational complexity. In this article,
we propose a fast mode decision algorithm for 3D-HEVC.
Since multiview videos and their associated depth maps
represent the same scene, at the same time instant,
their prediction modes are closely linked. Furthermore,
the prediction information of a CU at the depth level X
is strongly related to that of its parent CU at the
depth level X-1 in the quadtree coding structure of
HEVC since two corresponding CUs from two neighboring
depth levels share similar video characteristics. The
proposed algorithm jointly exploits the inter-view
coding mode correlation, the inter-component
(texture-depth) correlation and the inter-level
correlation in the quadtree structure of 3D-HEVC.
Experimental results show that our algorithm saves 66\%
encoder runtime on average with only a 0.2\% BD-Rate
increase on coded views and 1.3\% BD-Rate increase on
synthesized views.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "34",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yang:2015:BML,
author = "Xiaoshan Yang and Tianzhu Zhang and Changsheng Xu and
Ming-Hsuan Yang",
title = "Boosted Multifeature Learning for Cross-Domain
Transfer",
journal = j-TOMM,
volume = "11",
number = "3",
pages = "35:1--35:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700286",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Feb 5 17:03:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Conventional learning algorithm assumes that the
training data and test data share a common
distribution. However, this assumption will greatly
hinder the practical application of the learned model
for cross-domain data analysis in multimedia. To deal
with this issue, transfer learning based technology
should be adopted. As a typical version of transfer
learning, domain adaption has been extensively studied
recently due to its theoretical value and practical
interest. In this article, we propose a boosted
multifeature learning (BMFL) approach to iteratively
learn multiple representations within a boosting
procedure for unsupervised domain adaption. The
proposed BMFL method has a number of properties. (1) It
reuses all instances with different weights assigned by
the previous boosting iteration and avoids discarding
labeled instances as in conventional methods. (2) It
models the instance weight distribution effectively by
considering the classification error and the domain
similarity, which facilitates learning new feature
representation to correct the previously misclassified
instances. (3) It learns multiple different feature
representations to effectively bridge the source and
target domains. We evaluate the BMFL by comparing its
performance on three applications: image
classification, sentiment classification and spam
filtering. Extensive experimental results demonstrate
that the proposed BMFL algorithm performs favorably
against state-of-the-art domain adaption methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "35",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lin:2015:DVS,
author = "Pei-Yu Lin",
title = "Double Verification Secret Sharing Mechanism Based on
Adaptive Pixel Pair Matching",
journal = j-TOMM,
volume = "11",
number = "3",
pages = "36:1--36:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700291",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Feb 5 17:03:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Verifiability is essential for the secret sharing
approach, which allows the involved participants to
detect cheaters during the secret retrieval process. In
this article, we propose a double verification secret
sharing (DVSS) mechanism that can not only prevent
fraudulent participants but also satisfy the
requirements of secret payload, camouflage, image
fidelity and lossless revealed secret. DVSS offers
double verification process to enhance the cheater
detectability; experimental results reveal that the
designed scheme can share larger secret capacity and
retain superior image quality than the related secret
sharing methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "36",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Wang:2015:INB,
author = "Shuang Wang and Shuqiang Jiang",
title = "{INSTRE}: a New Benchmark for Instance-Level Object
Retrieval and Recognition",
journal = j-TOMM,
volume = "11",
number = "3",
pages = "37:1--37:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700292",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Feb 5 17:03:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Over the last several decades, researches on visual
object retrieval and recognition have achieved fast and
remarkable success. However, while the category-level
tasks prevail in the community, the instance-level
tasks (especially recognition) have not yet received
adequate focuses. Applications such as content-based
search engine and robot vision systems have alerted the
awareness to bring instance-level tasks into a more
realistic and challenging scenario. Motivated by the
limited scope of existing instance-level datasets, in
this article we propose a new benchmark for
INSTance-level visual object REtrieval and REcognition
(INSTRE). Compared with existing datasets, INSTRE has
the following major properties: (1) balanced data
scale, (2) more diverse intraclass instance variations,
(3) cluttered and less contextual backgrounds, (4)
object localization annotation for each image, (5)
well-manipulated double-labelled images for measuring
multiple object (within one image) case. We will
quantify and visualize the merits of INSTRE data, and
extensively compare them against existing datasets.
Then on INSTRE, we comprehensively evaluate several
popular algorithms to large-scale object retrieval
problem with multiple evaluation metrics. Experimental
results show that all the methods suffer a performance
drop on INSTRE, proving that this field still remains a
challenging problem. Finally we integrate these
algorithms into a simple yet efficient scheme for
recognition and compare it with classification-based
methods. Importantly, we introduce the realistic
multiobjects recognition problem. All experiments are
conducted in both single object case and multiple
objects case.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "37",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lathey:2015:IEE,
author = "Ankita Lathey and Pradeep K. Atrey",
title = "Image Enhancement in Encrypted Domain over Cloud",
journal = j-TOMM,
volume = "11",
number = "3",
pages = "38:1--38:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2656205",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Feb 5 17:03:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Cloud-based multimedia systems are becoming
increasingly common. These systems offer not only
storage facility, but also high-end computing
infrastructure which can be used to process data for
various analysis tasks ranging from low-level data
quality enhancement to high-level activity and behavior
identification operations. However, cloud data centers,
being third party servers, are often prone to
information leakage, raising security and privacy
concerns. In this article, we present a Shamir's secret
sharing based method to enhance the quality of
encrypted image data over cloud. Using the proposed
method we show that several image enhancement
operations such as noise removal, antialiasing, edge
and contrast enhancement, and dehazing can be performed
in encrypted domain with near-zero loss in accuracy and
minimal computation and data overhead. Moreover, the
proposed method is proven to be information
theoretically secure.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "38",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yin:2015:CVC,
author = "Yifang Yin and Beomjoo Seo and Roger Zimmermann",
title = "Content vs. Context: Visual and Geographic Information
Use in Video Landmark Retrieval",
journal = j-TOMM,
volume = "11",
number = "3",
pages = "39:1--39:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700287",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Feb 5 17:03:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Due to the ubiquity of sensor-equipped smartphones, it
has become increasingly feasible for users to capture
videos together with associated geographic metadata,
for example the location and the orientation of the
camera. Such contextual information creates new
opportunities for the organization and retrieval of
geo-referenced videos. In this study we explore the
task of landmark retrieval through the analysis of two
types of state-of-the-art techniques, namely
media-content-based and geocontext-based retrievals.
For the content-based method, we choose the Spatial
Pyramid Matching (SPM) approach combined with two
advanced coding methods: Sparse Coding (SC) and
Locality-Constrained Linear Coding (LLC). For the
geo-based method, we present the Geo Landmark
Visibility Determination (GeoLVD) approach which
computes the visibility of a landmark based on
intersections of a camera's field-of-view (FOV) and the
landmark's geometric information available from
Geographic Information Systems (GIS) and services. We
first compare the retrieval results of the two methods,
and discuss the strengths and weaknesses of each
approach in terms of precision, recall and execution
time. Next we analyze the factors that affect the
effectiveness for the content-based and the geo-based
methods, respectively. Finally we propose a hybrid
retrieval method based on the integration of the visual
(content) and geographic (context) information, which
is shown to achieve significant improvements in our
experiments. We believe that the results and
observations in this work will enlighten the design of
future geo-referenced video retrieval systems, improve
our understanding of selecting the most appropriate
visual features for indexing and searching, and help in
selecting between the most suitable methods for
retrieval based on different conditions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "39",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Yang:2015:RCI,
author = "Hong-Ying Yang and Xiang-Yang Wang and Pan-Pan Niu and
Ai-Long Wang",
title = "Robust Color Image Watermarking Using Geometric
Invariant Quaternion Polar Harmonic Transform",
journal = j-TOMM,
volume = "11",
number = "3",
pages = "40:1--40:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700299",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Feb 5 17:03:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "It is a challenging work to design a robust color
image watermarking scheme against geometric
distortions. Moments and moment invariants have become
a powerful tool in robust image watermarking owing to
their image description capability and geometric
invariance property. However, the existing moment-based
watermarking schemes were mainly designed for gray
images but not for color images, and detection quality
and robustness will be lowered when watermark is
directly embedded into the luminance component or three
color channels of color images. Furthermore, the
imperceptibility of the embedded watermark is not well
guaranteed. Based on algebra of quaternions and polar
harmonic transform (PHT), we introduced the quaternion
polar harmonic transform (QPHT) for invariant color
image watermarking in this article, which can be seen
as the generalization of PHT for gray-level images. It
is shown that the QPHT can be obtained from the PHT of
each color channel. We derived and analyzed the
rotation, scaling, and translation (RST) invariant
property of QPHT. We also discussed the problem of
color image watermarking using QPHT. Experimental
results are provided to illustrate the efficiency of
the proposed color image watermarking against geometric
distortions and common image processing operations
(including color attacks).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "40",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Krishnappa:2015:CCV,
author = "Dilip Kumar Krishnappa and Michael Zink and Carsten
Griwodz and P{\aa}l Halvorsen",
title = "Cache-Centric Video Recommendation: an Approach to
Improve the Efficiency of {YouTube} Caches",
journal = j-TOMM,
volume = "11",
number = "4",
pages = "48:1--48:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2716310",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 7 08:29:56 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article, we take advantage of the user
behavior of requesting videos from the top of the
related list provided by YouTube to improve the
performance of YouTube caches. We recommend that local
caches reorder the related lists associated with
YouTube videos, presenting the cached content above
noncached content. We argue that the likelihood that
viewers select content from the top of the related list
is higher than selection from the bottom, and pushing
contents already in the cache to the top of the related
list would increase the likelihood of choosing cached
content. To verify that the position on the list really
is the selection criterion more dominant than the
content itself, we conduct a user study with 40
YouTube-using volunteers who were presented with random
related lists in their everyday YouTube use. After
confirming our assumption, we analyze the benefits of
our approach by an investigation that is based on two
traces collected from a university campus. Our analysis
shows that the proposed reordering approach for related
lists would lead to a 2 to 5 times increase in cache
hit rate compared to an approach without reordering the
related list. This increase in hit rate would lead to
reduction in server load and backend bandwidth usage,
which in turn reduces the latency in streaming the
video requested by the viewer and has the potential to
improve the overall performance of YouTube's content
distribution system. An analysis of YouTube's
recommendation system reveals that related lists are
created from a small pool of videos, which increases
the potential for caching content from related lists
and reordering based on the content in the cache.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "48",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Zhang:2015:PMC,
author = "Yu Zhang and James Z. Wang and Jia Li",
title = "Parallel Massive Clustering of Discrete
Distributions",
journal = j-TOMM,
volume = "11",
number = "4",
pages = "49:1--49:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700293",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 7 08:29:56 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The trend of analyzing big data in artificial
intelligence demands highly-scalable machine learning
algorithms, among which clustering is a fundamental and
arguably the most widely applied method. To extend the
applications of regular vector-based clustering
algorithms, the Discrete Distribution (D2) clustering
algorithm has been developed, aiming at clustering data
represented by bags of weighted vectors which are well
adopted data signatures in many emerging information
retrieval and multimedia learning applications.
However, the high computational complexity of
D2-clustering limits its impact in solving massive
learning problems. Here we present the parallel
D2-clustering (PD2-clustering) algorithm with
substantially improved scalability. We developed a
hierarchical multipass algorithm structure for parallel
computing in order to achieve a balance between the
individual-node computation and the integration process
of the algorithm. Experiments and extensive comparisons
between PD2-clustering and other clustering algorithms
are conducted on synthetic datasets. The results show
that the proposed parallel algorithm achieves
significant speed-up with minor accuracy loss. We apply
PD2-clustering to image concept learning. In addition,
by extending D2-clustering to symbolic data, we apply
PD2-clustering to protein sequence clustering. For both
applications, we demonstrate the high competitiveness
of our new algorithm in comparison with other
state-of-the-art methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "49",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Baik:2015:EMR,
author = "Eilwoo Baik and Amit Pande and Prasant Mohapatra",
title = "Efficient {MAC} for Real-Time Video Streaming over
Wireless {LAN}",
journal = j-TOMM,
volume = "11",
number = "4",
pages = "50:1--50:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2744412",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 7 08:29:56 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Wireless communication systems are highly prone to
channel errors. With video being a major player in
Internet traffic and undergoing exponential growth in
wireless domain, we argue for the need of a Video-aware
MAC (VMAC) to significantly improve the throughput and
delay performance of real-time video streaming service.
VMAC makes two changes to optimize wireless LAN for
video traffic: (a) It incorporates a
Perceptual-Error-Tolerance (PET) to the MAC frames by
reducing MAC retransmissions while minimizing any
impact on perceptual video quality; and (b) It uses a
group NACK-based Adaptive Window (NAW) of MAC frames to
improve both throughput and delay performance in
varying channel conditions. Through simulations and
experiments, we observe 56--89\% improvement in
throughput and 34--48\% improvement in delay
performance over legacy DCF and 802.11e schemes. VMAC
also shows 15--78\% improvement over legacy schemes
with multiple clients.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "50",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Antaris:2015:SSC,
author = "Stefanos Antaris and Dimitrios Rafailidis",
title = "Similarity Search over the Cloud Based on Image
Descriptors' Dimensions Value Cardinalities",
journal = j-TOMM,
volume = "11",
number = "4",
pages = "51:1--51:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2716315",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 7 08:29:56 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In recognition that in modern applications billions of
images are stored into distributed databases in
different logical or physical locations, we propose a
similarity search strategy over the cloud based on the
dimensions value cardinalities of image descriptors.
Our strategy has low preprocessing requirements by
dividing the computational cost of the preprocessing
steps into several nodes over the cloud and locating
the descriptors with similar dimensions value
cardinalities logically close. New images are inserted
into the distributed databases over the cloud
efficiently, by supporting dynamical update in
real-time. The proposed insertion algorithm has low
computational complexity, depending exclusively on the
dimensionality of descriptors and a small subset of
descriptors with similar dimensions value
cardinalities. Finally, an efficient query processing
algorithm is proposed, where the dimensions of image
descriptors are prioritized in the searching strategy,
assuming that dimensions of high value cardinalities
have more discriminative power than the dimensions of
low ones. The computation effort of the query
processing algorithm is divided into several nodes over
the cloud infrastructure. In our experiments with seven
publicly available datasets of image descriptors, we
show that the proposed similarity search strategy
outperforms competitive methods of single node,
parallel and cloud-based architectures, in terms of
preprocessing cost, search time and accuracy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "51",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Lin:2015:AMD,
author = "Yin-Tzu Lin and I-Ting Liu and Jyh-Shing Roger Jang
and Ja-Ling Wu",
title = "Audio Musical Dice Game: a User-Preference-Aware
Medley Generating System",
journal = j-TOMM,
volume = "11",
number = "4",
pages = "52:1--52:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2710015",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 7 08:29:56 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article proposes a framework for creating
user-preference-aware music medleys from users' music
collections. We treat the medley generation process as
an audio version of a musical dice game. Once the
user's collection has been analyzed, the system is able
to generate various pleasing medleys. This flexibility
allows users to create medleys according to the
specified conditions, such as the medley structure or
the must-use clips. Even users without musical
knowledge can compose medley songs from their favorite
tracks. The effectiveness of the system has been
evaluated through both objective and subjective
experiments on individual components in the system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "52",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Chen:2015:AVR,
author = "Bo-Hao Chen and Shih-Chia Huang",
title = "An Advanced Visibility Restoration Algorithm for
Single Hazy Images",
journal = j-TOMM,
volume = "11",
number = "4",
pages = "53:1--53:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2726947",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 7 08:29:56 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Haze removal is the process by which horizontal
obscuration is eliminated from hazy images captured
during inclement weather. Images captured in natural
environments with varied weather conditions frequently
exhibit localized light sources or color-shift effects.
The occurrence of these effects presents a difficult
challenge for hazy image restoration, with which many
traditional restoration methods cannot adequately
contend. In this article, we present a new image haze
removal approach based on Fisher's linear
discriminant-based dual dark channel prior scheme in
order to solve the problems associated with the
presence of localized light sources and color shifts,
and thereby achieve effective restoration. Experimental
restoration results via qualitative and quantitative
evaluations show that our proposed approach can provide
higher haze-removal efficacy for images captured in
varied weather conditions than can the other
state-of-the-art approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "53",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Bao:2015:CPE,
author = "Bing-Kun Bao and Changsheng Xu and Weiqing Min and
Mohammod Shamim Hossain",
title = "Cross-Platform Emerging Topic Detection and
Elaboration from Multimedia Streams",
journal = j-TOMM,
volume = "11",
number = "4",
pages = "54:1--54:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2730889",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 7 08:29:56 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "With the explosive growth of online media platforms in
recent years, it becomes more and more attractive to
provide users a solution of emerging topic detection
and elaboration. And this posts a real challenge to
both industrial and academic researchers because of the
overwhelming information available in multiple
modalities and with large outlier noises. This article
provides a method on emerging topic detection and
elaboration using multimedia streams cross different
online platforms. Specifically, Twitter, New York Times
and Flickr are selected for the work to represent the
microblog, news portal and imaging sharing platforms.
The emerging keywords of Twitter are firstly extracted
using aging theory. Then, to overcome the nature of
short length message in microblog, Robust
Cross-Platform Multimedia Co-Clustering (RCPMM-CC) is
proposed to detect emerging topics with three
novelties: (1) The data from different media platforms
are in multimodalities; (2) The coclustering is
processed based on a pairwise correlated structure, in
which the involved three media platforms are pairwise
dependent; (3) The noninformative samples are
automatically pruned away at the same time of
coclustering. In the last step of cross-platform
elaboration, we enrich each emerging topic with the
samples from New York Times and Flickr by computing the
implicit links between social topics and samples from
selected news and Flickr image clusters, which are
obtained by RCPMM-CC. Qualitative and quantitative
evaluation results demonstrate the effectiveness of our
method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "54",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Li:2015:QQG,
author = "Yang Li and Azzedine Boukerche",
title = "{QuGu}: a Quality Guaranteed Video Dissemination
Protocol Over Urban Vehicular Ad Hoc Networks",
journal = j-TOMM,
volume = "11",
number = "4",
pages = "55:1--55:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2725469",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 7 08:29:56 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "Video dissemination over Vehicular Ad Hoc Networks is
an attractive technology that supports many novel
applications. The merit of this work lies in the design
of an efficient video dissemination protocol that
provides high video quality at different data rates for
urban scenarios. Our objective is to improve received
video quality while meeting delay and packet loss. In
this work, we first employ a reliable scheme known as
connected dominating set, which is an efficient
receiver-based routing scheme for broadcasting video
content. To avoid repeated computing of the connected
dominating set, we add three statuses to each node. In
nonscalable video coding, the distribution of lost
frames can cause a major impact on video quality at the
receiver's end. Therefore, for the second step, we
employ Interleaving to spread out the burst losses and
to reduce the influence of loss distributions. Although
Interleaving can reduce the influence of cluster frame
loss, single packet loss is also a concern due to
collisions, and to intermittent disconnection in the
topology. In order to fix these single packet losses,
we propose a store-carry-forward scheme for the nodes
in order to retransmit the local buffer stored packets.
The results, when compared to the selected base
protocols, show that our proposed protocol is an
efficient solution for video dissemination over urban
Vehicular Ad Hoc Networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "55",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Gaddam:2015:COM,
author = "Vamsidhar Reddy Gaddam and Ragnhild Eg and Ragnar
Langseth and Carsten Griwodz and P{\aa}l Halvorsen",
title = "The Cameraman Operating My Virtual Camera is
Artificial: Can the Machine Be as Good as a Human?",
journal = j-TOMM,
volume = "11",
number = "4",
pages = "56:1--56:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2744411",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 7 08:29:56 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this article, we argue that the energy spent in
designing autonomous camera control systems is not
spent in vain. We present a real-time virtual camera
system that can create smooth camera motion. Similar
systems are frequently benchmarked with the human
operator as the best possible reference; however, we
avoid a priori assumptions in our evaluations. Our main
question is simply whether we can design algorithms to
steer a virtual camera that can compete with the user
experience for recordings from an expert operator with
several years of experience? In this respect, we
present two low-complexity servoing methods that are
explored in two user studies. The results from the user
studies give a promising answer to the question
pursued. Furthermore, all components of the system meet
the real-time requirements on commodity hardware. The
growing capabilities of both hardware and network in
mobile devices give us hope that this system can be
deployed to mobile users in the near future. Moreover,
the design of the presented system takes into account
that services to concurrent users must be supported.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "56",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Natarajan:2015:MCC,
author = "Prabhu Natarajan and Pradeep K. Atrey and Mohan
Kankanhalli",
title = "Multi-Camera Coordination and Control in Surveillance
Systems: a Survey",
journal = j-TOMM,
volume = "11",
number = "4",
pages = "57:1--57:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2710128",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 7 08:29:56 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "The use of multiple heterogeneous cameras is becoming
more common in today's surveillance systems. In order
to perform surveillance tasks, effective coordination
and control in multi-camera systems is very important,
and is catching significant research attention these
days. This survey aims to provide researchers with a
state-of-the-art overview of various techniques for
multi-camera coordination and control (MC$^3$) that
have been adopted in surveillance systems. The existing
literature on MC$^3$ is presented through several
classifications based on the applicable architectures,
frameworks and the associated surveillance tasks.
Finally, a discussion on the open problems in
surveillance area that can be solved effectively using
MC$^3$ and the future directions in MC$^3$ research is
presented",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "57",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{You:2015:UPD,
author = "Shingchern D. You and Yi-Han Pu",
title = "Using Paired Distances of Signal Peaks in Stereo
Channels as Fingerprints for Copy Identification",
journal = j-TOMM,
volume = "12",
number = "1",
pages = "1:1--1:??",
month = aug,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2742059",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 28 06:14:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "This article proposes to use the relative distances
between adjacent envelope peaks detected in stereo
audio as fingerprints for copy identification. The
matching algorithm used is the rough longest common
subsequence (RLCS) algorithm. The experimental results
show that the proposed approach has better
identification accuracy than an MPEG-7 based scheme for
distorted and noisy audio. When compared with other
schemes, the proposed scheme uses fewer bits with
comparable performance. The proposed fingerprints can
also be used in conjunction with the MPEG-7 based
scheme for lower computational burden.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "1",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{ElEssaili:2015:QBC,
author = "Ali {El Essaili} and Zibin Wang and Eckehard Steinbach
and Liang Zhou",
title = "{QoE}-Based Cross-Layer Optimization for Uplink Video
Transmission",
journal = j-TOMM,
volume = "12",
number = "1",
pages = "2:1--2:??",
month = aug,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2801124",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 28 06:14:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "We study the problem of resource-efficient uplink
distribution of user-generated video content over
fourth-generation mobile networks. This is challenged
by (1) the capacity-limited and time-variant uplink
channel, (2) the resource-hungry upstreamed videos and
their dynamically changing complexity, and (3) the
different playout times of the video consumers. To
address these issues, we propose a systematic approach
for quality-of-experience (QoE)-based resource
optimization and uplink transmission of multiuser
generated video content. More specifically, we present
an analytical model for distributed scalable video
transmission at the mobile producers which considers
these constraints. This is complemented by a multiuser
cross-layer optimizer in the mobile network which
determines the transmission capacity for each mobile
terminal under current cell load and radio conditions.
Both optimal and low-complexity solutions are
presented. Simulation results for LTE uplink
transmission show that significant gains in perceived
video quality can be achieved by our cross-layer
resource optimization scheme. In addition, the
distributed optimization at the mobile producers can
further improve the user experience across the
different types of video consumers.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Multimed Comput. Commun. Appl.",
articleno = "2",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Li:2015:CSN,
author = "Li-Jia Li and David A. Shamma and Xiangnan Kong and
Sina Jafarpour and Roelof {Van Zwol} and Xuanhui Wang",
title = "{CelebrityNet}: a Social Network Constructed from
Large-Scale Online Celebrity Images",
journal = j-TOMM,
volume = "12",
number = "1",
pages = "3:1--3:??",
month = aug,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2801125",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Fri Aug 28 06:14:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
https://www