%%% -*-BibTeX-*-
%%% ====================================================================
%%% BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.33",
%%%     date            = "28 June 2018",
%%%     time            = "15:58:23 MDT",
%%%     filename        = "tweb.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "29760 11550 66304 614700",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "ACM Transactions on the Web (TWEB);
%%%                        bibliography; TWEB",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        ACM Transactions on the Web (TWEB) (CODEN
%%%                        ????, ISSN 1559-1131), covering all journal
%%%                        issues from 2007 -- date.
%%%
%%%                        At version 1.33, the COMPLETE journal
%%%                        coverage looked like this:
%%%
%%%                             2007 (  14)    2011 (  21)    2015 (  20)
%%%                             2008 (  22)    2012 (  18)    2016 (  24)
%%%                             2009 (  14)    2013 (  30)    2017 (  25)
%%%                             2010 (  17)    2014 (  19)    2018 (  15)
%%%
%%%                             Article:        239
%%%
%%%                             Total entries:  239
%%%
%%%                        The journal Web page can be found at:
%%%
%%%                            http://www.acm.org/pubs/tweb.html
%%%
%%%
%%%                            http://www.acm.org/tweb/
%%%                            http://portal.acm.org/browse_dl.cfm?idx=J1062
%%%
%%%                        Qualified subscribers can retrieve the full
%%%                        text of recent articles in PDF form.
%%%
%%%                        The initial draft was extracted from the ACM
%%%                        Web pages.
%%%
%%%                        ACM copyrights explicitly permit abstracting
%%%                        with credit, so article abstracts, keywords,
%%%                        and subject classifications have been
%%%                        included in this bibliography wherever
%%%                        available.  Article reviews have been
%%%                        omitted, until their copyright status has
%%%                        been clarified.
%%%
%%%                        bibsource keys in the bibliography entries
%%%                        below indicate the entry originally came
%%%                        from the computer science bibliography
%%%                        archive, even though it has likely since
%%%                        been corrected and updated.
%%%
%%%                        URL keys in the bibliography point to
%%%                        World Wide Web locations of additional
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, using bibsort -byvolume.''
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility."
%%%     }
%%% ====================================================================

@Preamble{"\input bibnames.sty" #
"\def \TM {${}^{\sc TM}$}"
}


%%% ====================================================================
%%% Acknowledgement abbreviations:

@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|http://www.math.utah.edu/~beebe/|"}


%%% ====================================================================
%%% Journal abbreviations:

@String{j-TWEB                  = "ACM Transactions on the Web (TWEB)"}


%%% ====================================================================
%%% Bibliography entries:

@Article{Ashman:2007:I,
author =       "Helen Ashman and Arun Iyengar",
title =        "Introduction",
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "1:1--1:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1232722.1232723",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
acknowledgement = ack-nhfb,
articleno =    "1",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Urgaonkar:2007:AMM,
author =       "Bhuvan Urgaonkar and Giovanni Pacifici and Prashant
Shenoy and Mike Spreitzer and Asser Tantawi",
title =        "Analytic modeling of multitier {Internet}
applications",
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "2:1--2:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1232722.1232724",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Since many Internet applications employ a multitier
of analytically modeling the behavior of such
applications. We present a model based on a network of
queues where the queues represent different tiers of
the application. Our model is sufficiently general to
capture (i) the behavior of tiers with significantly
different performance characteristics and (ii)
application idiosyncrasies such as session-based
replicas, and caching at intermediate tiers. We
validate our model using real multitier applications
running on a Linux server cluster. Our experiments
indicate that our model faithfully captures the
performance of these applications for a number of
workloads and configurations. Furthermore, our model
successfully handles a comprehensive range of resource
utilization---from 0 to near saturation for the
CPU---for two separate tiers. For a variety of
scenarios, including those with caching at one of the
application tiers, the average response times predicted
by our model were within the 95\% confidence intervals
of the observed average response times. Our experiments
also demonstrate the utility of the model for dynamic
capacity provisioning, performance prediction,
bottleneck identification, and session policing. In one
scenario, where the request arrival rate increased from
less than 1500 to nearly 4200 requests/minute, a
dynamic provisioning technique employing our model was
able to maintain response time targets by increasing
the capacity of two of the tiers by factors of 2 and
3.5, respectively.",
acknowledgement = ack-nhfb,
articleno =    "2",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "analytical model; dynamic provisioning; hosting
platform; Internet service; mean-value analysis;
performance prediction; policing; queuing theory;
session; tier",
}

@Article{Jansen:2007:CES,
author =       "Bernard J. Jansen",
title =        "The comparative effectiveness of sponsored and
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "3:1--3:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1232722.1232725",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "The predominant business model for Web search engines
is sponsored search, which generates billions in yearly
consumers with relevant choices for products and
services? We address this and related issues by
investigating the relevance of sponsored and
search engines. The results show that average relevance
practically the same, although the relevance ratings
108 ecommerce queries and 8,256 retrieved links for
these queries from three major Web search engines:
measures, we qualitatively analyzed the e-commerce
queries, deriving five categorizations of underlying
information needs. Product-specific queries are the
most prevalent (48\%). Title (62\%) and summary (33\%)
with URL a distant third (2\%). To gauge the
effectiveness of sponsored search campaigns, we
It appears that links from organizations with large
sponsored search campaigns are more relevant than the
Web search engines and sponsored search as a long-term
business model and as a mechanism for finding relevant
information for searchers.",
acknowledgement = ack-nhfb,
articleno =    "3",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
results; sponsored search; Web search engines; Web
searching",
}

@Article{Church:2007:MIA,
author =       "Karen Church and Barry Smyth and Paul Cotter and Keith
title =        "Mobile information access: a study of emerging
search behavior on the mobile {Internet}",
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "4:1--4:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1232722.1232726",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "It is likely that mobile phones will soon come to
rival more traditional devices as the primary platform
for information access. Consequently, it is important
to understand the emerging information access behavior
of mobile Internet (MI) users especially in relation to
their use of mobile handsets for information browsing
the results of a recent analysis of the MI habits of
more than 600,000 European MI users, with a particular
emphasis on the emerging interest in mobile search. We
consider a range of factors including whether there are
key differences between browsing and search behavior on
the MI compared to the Web. We highlight how browsing
continues to dominate mobile information access, but go
on to show how search is becoming an increasingly
popular information access alternative especially in
relation to certain types of mobile handsets and
information needs. Moreover, we show that sessions
involving search tend to be longer and more data-rich
than those that do not involve search. We also look at
the type of queries used during mobile search and the
way that these queries tend to be modified during the
course of a mobile search session. Finally we examine
the overlap among mobile search queries and the
different topics mobile users are interested in.",
acknowledgement = ack-nhfb,
articleno =    "4",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "log analysis; Mobile browsing; mobile Internet; mobile
search",
}

@Article{Leskovec:2007:DVM,
Huberman",
title =        "The dynamics of viral marketing",
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "5:1--5:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1232722.1232727",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We present an analysis of a person-to-person
recommendation network, consisting of 4 million people
who made 16 million recommendations on half a million
products. We observe the propagation of recommendations
and the cascade sizes, which we explain by a simple
stochastic model. We analyze how user behavior varies
within user communities defined by a recommendation
network. Product purchases follow a long tail' where a
significant share of purchases belongs to rarely sold
items. We establish how the recommendation network
grows over time and how effective it is from the
viewpoint of the sender and receiver of the
recommendations. While on average recommendations are
not very effective at inducing purchases and do not
spread very far, we present a model that successfully
identifies communities, product, and pricing categories
for which viral marketing seems to be very effective.",
acknowledgement = ack-nhfb,
articleno =    "5",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "e-commerce; long tail; network analysis; recommender
systems; Viral marketing; word-of-mouth",
}

@Article{Yu:2007:EAW,
author =       "Tao Yu and Yue Zhang and Kwei-Jay Lin",
title =        "Efficient algorithms for {Web} services selection with
end-to-end {QoS} constraints",
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "6:1--6:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1232722.1232728",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Service-Oriented Architecture (SOA) provides a
flexible framework for service composition. Using
standard-based protocols (such as SOAP and WSDL),
composite services can be constructed by integrating
atomic services developed independently. Algorithms are
needed to select service components with various QoS
levels according to some application-dependent
performance requirements. We design a broker-based
architecture to facilitate the selection of QoS-based
services. The objective of service selection is to
maximize an application-specific utility function under
the end-to-end QoS constraints. The problem is modeled
in two ways: the combinatorial model and the graph
model. The combinatorial model defines the problem as a
multidimension multichoice 0-1 knapsack problem (MMKP).
The graph model defines the problem as a
multiconstraint optimal path (MCOP) problem. Efficient
heuristic algorithms for service processes of different
and their performances are studied by simulations. We
also compare the pros and cons between the two
models.",
acknowledgement = ack-nhfb,
articleno =    "6",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "End-to-end QoS; service composition; service oriented
architecture (SOA); service selection; Web services",
}

@Article{Dubinko:2007:VTT,
author =       "Micah Dubinko and Ravi Kumar and Joseph Magnani and
Jasmine Novak and Prabhakar Raghavan and Andrew
Tomkins",
title =        "Visualizing tags over time",
journal =      j-TWEB,
volume =       "1",
number =       "2",
pages =        "7:1--7:??",
month =        aug,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1255438.1255439",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We consider the problem of visualizing the evolution
of tags within the Flickr (flickr.com) online image
sharing community. Any user of the Flickr service may
append a tag to any photo in the system. Over the past
year, users have on average added over a million tags
each week. Understanding the evolution of these tags
over time is therefore a challenging task. We present a
new approach based on a characterization of the most
interesting tags associated with a sliding interval of
time. An animation provided via Flash in a Web browser
allows the user to observe and interact with the
interesting tags as they evolve over time.\par

New algorithms and data structures are required to
support the efficient generation of this visualization.
We combine a novel solution to an interval covering
problem with extensions to previous work on score
aggregation in order to create an efficient backend
system capable of producing visualizations at arbitrary
scales on this large dataset in real time.",
acknowledgement = ack-nhfb,
articleno =    "7",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "Flickr; interval covering; tags; temporal evolution;
visualization",
}

@Article{Mohan:2007:SPC,
author =       "Bharath Kumar Mohan and Benjamin J. Keller and Naren
Ramakrishnan",
title =        "Scouts, promoters, and connectors: {The} roles of
ratings in nearest-neighbor collaborative filtering",
journal =      j-TWEB,
volume =       "1",
number =       "2",
pages =        "8:1--8:??",
month =        aug,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1255438.1255440",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Recommender systems aggregate individual user ratings
into predictions of products or services that might
interest visitors. The quality of this aggregation
process crucially affects the user experience and hence
the effectiveness of recommenders in e-commerce. We
present a characterization of nearest-neighbor
collaborative filtering that allows us to disaggregate
global recommender performance measures into
contributions made by each individual rating. In
particular, we formulate three roles--- {\em scouts},
{\em promoters}, and {\em connectors\/} ---that capture
how users receive recommendations, how items get
recommended, and how ratings of these two types are
themselves connected, respectively. These roles find
direct uses in improving recommendations for users, in
better targeting of items and, most importantly, in
helping monitor the health of the system as a whole.
For instance, they can be used to track the evolution
of neighborhoods, to identify rating subspaces that do
not contribute (or contribute negatively) to system
performance, to enumerate users who are in danger of
leaving, and to assess the susceptibility of the system
to attacks such as shilling. We argue that the three
rating roles presented here provide broad primitives to
manage a recommender system and its community.",
acknowledgement = ack-nhfb,
articleno =    "8",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "collaborative filtering; connectors; neighborhoods;
promoters; Recommender systems; scouts; user-based and
item-based algorithms",
}

@Article{Rogers:2007:EPB,
author =       "Alex Rogers and Esther David and Nicholas R. Jennings
and Jeremy Schiff",
title =        "The effects of proxy bidding and minimum bid
increments within {eBay} auctions",
journal =      j-TWEB,
volume =       "1",
number =       "2",
pages =        "9:1--9:??",
month =        aug,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1255438.1255441",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We present a mathematical model of the eBay auction
protocol and perform a detailed analysis of the effects
that the eBay proxy bidding system and the minimum bid
increment have on the auction properties. We first
consider the revenue of the auction, and we show
analytically that when two bidders with independent
private valuations use the eBay proxy bidding system
there exists an optimal value for the minimum bid
increment at which the auctioneer's revenue is
maximized. We then consider the sequential way in which
bids are placed within the auction, and we show
analytically that independent of assumptions regarding
the bidders' valuation distribution or bidding strategy
the number of visible bids placed is related to the
logarithm of the number of potential bidders. Thus, in
many cases, it is only a minority of the potential
bidders that are able to submit bids and are visible in
the auction bid history (despite the fact that the
other hidden bidders are still effectively competing
for the item). Furthermore, we show through simulation
that the minimum bid increment also introduces an
inefficiency to the auction, whereby a bidder who
enters the auction late may find that its valuation is
insufficient to allow them to advance the current bid
by the minimum bid increment despite them actually
having the highest valuation for the item. Finally, we
use these results to consider appropriate strategies
for bidders within real world eBay auctions. We show
that while last-minute bidding (sniping) is an
effective strategy against bidders engaging in
incremental bidding (and against those with common
values), in general, delaying bidding is
disadvantageous even if delayed bids are sure to be
received before the auction closes. Thus, when several
bidders submit last-minute bids, we show that rather
than seeking to bid as late as possible, a bidder
should try to be the first sniper to bid (i.e., it
should snipe before the snipers'').",
acknowledgement = ack-nhfb,
articleno =    "9",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "bid increment; electronic commerce; Online auctions;
proxy bidding; sniping",
}

@Article{Serrano:2007:DSW,
author =       "M. {\'A}ngeles Serrano and Ana Maguitman and
Mari{\'a}n Bogu{\~n}{\'a} and Santo Fortunato and
Alessandro Vespignani",
title =        "Decoding the structure of the {WWW}: a comparative
analysis of {Web} crawls",
journal =      j-TWEB,
volume =       "1",
number =       "2",
pages =        "10:1--10:??",
month =        aug,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1255438.1255442",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "The understanding of the immense and intricate
topological structure of the World Wide Web (WWW) is a
major scientific and technological challenge. This has
been recently tackled by characterizing the properties
of its representative graphs, in which vertices and
directed edges are identified with Web pages and
hyperlinks, respectively. Data gathered in large-scale
crawls have been analyzed by several groups resulting
in a general picture of the WWW that encompasses many
of the complex properties typical of rapidly evolving
statistical analysis of the topological properties of
four different WWW graphs obtained with different
crawlers. We find that, despite the very large size of
the samples, the statistical measures characterizing
these graphs differ quantitatively, and in some cases
qualitatively, depending on the domain analyzed and the
crawl used for gathering the data. This spurs the issue
of the presence of sampling biases and structural
differences of Web crawls that might induce properties
not representative of the actual global underlying
graph. In short, the stability of the widely accepted
statistical description of the Web is called into
question. In order to provide a more accurate
characterization of the Web graph, we study statistical
measures beyond the degree distribution, such as
degree-degree correlation functions or the statistics
of reciprocal connections. The latter appears to
enclose the relevant correlations of the WWW graph and
carry most of the topological information of the Web.
The analysis of this quantity is also of major interest
in relation to the navigability and searchability of
the Web.",
acknowledgement = ack-nhfb,
articleno =    "10",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "crawler biases; statistical analysis; Web graph
structure; Web measurement",
}

@Article{Reis:2007:BVD,
author =       "Charles Reis and John Dunagan and Helen J. Wang and
Opher Dubrovsky and Saher Esmeir",
title =        "{BrowserShield}: {Vulnerability}-driven filtering of
dynamic {HTML}",
journal =      j-TWEB,
volume =       "1",
number =       "3",
pages =        "11:1--11:??",
month =        sep,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1281480.1281481",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Vulnerability-driven filtering of network data can
offer a fast and easy-to-deploy alternative or
intermediary to software patching, as exemplified in
Shield's vision to a new domain, inspecting and
cleansing not just static content, but also dynamic
content. The dynamic content we target is the dynamic
HTML in Web pages, which have become a popular vector
for attacks. The key challenge in filtering dynamic
HTML is that it is undecidable to statically determine
whether an embedded script will exploit the browser at
runtime. We avoid this undecidability problem by
rewriting web pages and any embedded scripts into safe
equivalents, inserting checks so that the filtering is
done at runtime. The rewritten pages contain logic for
recursively applying runtime checks to dynamically
generated or modified web content, based on known
vulnerabilities. We have built and evaluated {\em
BrowserShield}, a general framework that performs this
dynamic instrumentation of embedded scripts, and that
admits policies for customized runtime actions like
vulnerability-driven filtering. We also explore other
applications on top of BrowserShield.",
acknowledgement = ack-nhfb,
articleno =    "11",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "code rewriting; JavaScript; vulnerability; Web
browser",
}

@Article{Sun:2007:MDW,
author =       "Zan Sun and Jalal Mahmud and I. V. Ramakrishnan and
Saikat Mukherjee",
title =        "Model-directed {Web} transactions under constrained
modalities",
journal =      j-TWEB,
volume =       "1",
number =       "3",
pages =        "12:1--12:??",
month =        sep,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1281480.1281482",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Online transactions (e.g., buying a book on the Web)
typically involve a number of steps spanning several
pages. Conducting such transactions under constrained
interaction modalities as exemplified by small screen
handhelds or interactive speech interfaces --- the
primary mode of communication for visually impaired
individuals --- is a strenuous, fatigue-inducing
activity. But usually one needs to browse only a small
fragment of a Web page to perform a transactional step
such as a form fillout, selecting an item from a search
results list, and so on. We exploit this observation to
develop an automata-based process model that delivers
only the relevant'' page fragments at each
transactional step, thereby reducing information
overload on such narrow interaction bandwidths. We
realize this model by coupling techniques from content
analysis of Web documents, automata learning and
statistical classification. The process model and
associated techniques have been incorporated into
Guide-O, a prototype system that facilitates online
transactions using speech/keyboard interface
(Guide-O-Speech), or with limited-display size
handhelds (Guide-O-Mobile). Performance of Guide-O and
its user experience are reported.",
acknowledgement = ack-nhfb,
articleno =    "12",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "assistive device; content adaption; Web transaction",
}

author =       "Raj Sharman and Shiva Shankar Ramanna and Ram Ramesh
and Ram Gopal",
title =        "Cache architecture for on-demand streaming on the
{Web}",
journal =      j-TWEB,
volume =       "1",
number =       "3",
pages =        "13:1--13:??",
month =        sep,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1281480.1281483",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "On-demand streaming from a remote server through
best-effort Internet poses several challenges because
of network losses and variable delays. The primary
technique used to improve the quality of distributed
content service is replication. In the context of the
Internet, Web caching is the traditional mechanism that
delivery model for a distributed architecture in which
video is streamed from remote servers to edge caches
where the video is buffered and then streamed to the
client through a last-mile connection. The model uses a
novel revolving indexed cache buffer management
mechanism at the edge cache and employs selective
retransmissions of lost packets between the remote and
edge cache for a best-effort recovery of the losses.
The new Web cache buffer management scheme includes a
dynamic adjustment of cache buffer parameters based on
network conditions. In addition, performance of buffer
management and retransmission policies at the edge
cache is modeled and assessed using a probabilistic
analysis of the streaming process as well as system
simulations. The influence of different endogenous
control parameters on the quality of stream received by
the client is studied. Calibration curves on the QoS
metrics for different network conditions have been
obtained using simulations. Edge cache management can
be done using these calibration curves. ISPs can make
use of calibration curves to set the values of the
endogenous control parameters for specific QoS in
real-time streaming operations based on network
conditions. A methodology to benchmark transmission
characteristics using real-time traffic data is
developed to enable effective decision making on edge
cache buffer allocation and management strategies.",
acknowledgement = ack-nhfb,
articleno =    "13",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "buffering; edge cache; on-demand streaming; quality of
service; selective retransmissions; Web caching",
}

@Article{Zdun:2007:MPD,
author =       "Uwe Zdun and Carsten Hentrich and Schahram Dustdar",
title =        "Modeling process-driven and service-oriented
architectures using patterns and pattern primitives",
journal =      j-TWEB,
volume =       "1",
number =       "3",
pages =        "14:1--14:??",
month =        sep,
year =         "2007",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1281480.1281484",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Service-oriented architectures are increasingly used
in the context of business processes. However, the
proven practices for process-oriented integration of
services are not well documented yet. In addition,
modeling approaches for the integration of processes
and services are neither mature nor do they exactly
propose a pattern language for process-oriented
integration of services to describe the proven
practices. Our main contribution is a modeling concept
based on pattern primitives for these patterns. A
pattern primitive is a fundamental, precisely specified
modeling element that represents a pattern. We present
a catalog of pattern primitives that are precisely
modeled using OCL constraints and map these primitives
to the patterns in the pattern language of
process-oriented integration of services. We also
present a model validation tool that we have developed
to support modeling the process-oriented integration of
services, and an industrial case study in which we have
applied our results.",
acknowledgement = ack-nhfb,
articleno =    "14",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "middleware; Service-oriented architecture; software
patterns",
}

@Article{Najork:2008:ISS,
author =       "Marc Najork and Brian D. Davison",
title =        "Introduction to special section on adversarial issues
in {Web} search",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "1:1--1:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1326561.1326562",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
acknowledgement = ack-nhfb,
articleno =    "1",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Becchetti:2008:LAW,
author =       "Luca Becchetti and Carlos Castillo and Debora Donato
and Ricardo Baeza-Yates and Stefano Leonardi",
title =        "Link analysis for {Web} spam detection",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "2:1--2:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1326561.1326563",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We propose link-based techniques for automatic
detection of Web spam, a term referring to pages which
use deceptive techniques to obtain undeservedly high
scores in search engines. The use of Web spam is
widespread and difficult to solve, mostly due to the
large size of the Web which means that, in practice,
many algorithms are infeasible.\par

We perform a statistical analysis of a large collection
of Web pages. In particular, we compute statistics of
the links in the vicinity of every Web page applying
rank propagation and probabilistic counting over the
entire Web graph in a scalable way. These statistical
features are used to build Web spam classifiers which
only consider the link structure of the Web, regardless
of page contents. We then present a study of the
performance of each of the classifiers alone, as well
as their combined performance, by testing them over a
large collection of Web link spam. After tenfold
cross-validation, our best classifiers have a
performance comparable to that of state-of-the-art spam
classifiers that use content attributes, but are
orthogonal to content-based methods.",
acknowledgement = ack-nhfb,
articleno =    "2",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Urvoy:2008:TWS,
author =       "Tanguy Urvoy and Emmanuel Chauveau and Pascal Filoche
and Thomas Lavergne",
title =        "Tracking {Web} spam with {HTML} style similarities",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "3:1--3:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1326561.1326564",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Automatically generated content is ubiquitous in the
web: dynamic sites built using the three-tier paradigm
are good examples (e.g., commercial sites, blogs and
other sites edited using web authoring software), as
well as less legitimate spamdexing attempts (e.g., link
farms, faked directories).\par

Those pages built using the same generating method
(template or script) share a common look and feel''
that is not easily detected by common text
classification methods, but is more related to
stylometry.\par

In this work we study and compare several HTML style
similarity measures based on both textual and
extra-textual features in HTML source code. We also
propose a flexible algorithm to cluster a large
collection of documents according to these measures.
Since the proposed algorithm is based on locality
sensitive hashing (LSH), we first review this
technique.\par

We then describe how to use the HTML style similarity
clusters to pinpoint dubious pages and enhance the
quality of spam classifiers. We present an evaluation
of our algorithm on the WEBSPAM-UK2006 dataset.",
acknowledgement = ack-nhfb,
articleno =    "3",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "Clustering; document similarity; search engine spam;
stylometry; templates identification",
}

@Article{Lin:2008:DST,
author =       "Yu-Ru Lin and Hari Sundaram and Yun Chi and Junichi
Tatemura and Belle L. Tseng",
title =        "Detecting splogs via temporal dynamics using
self-similarity analysis",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "4:1--4:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1326561.1326565",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
(splog) detection using temporal and structural
regularity of content, post time and links. Splogs are
undesirable blogs meant to attract search engine
traffic, used solely for promoting affiliate sites.
Blogs represent popular online media, and splogs not
only degrade the quality of search engine results, but
also waste network resources. The splog detection
problem is made difficult due to the lack of stable
content descriptors.\par

We have developed a new technique for detecting splogs,
based on the observation that a blog is a dynamic,
growing sequence of entries (or posts) rather than a
collection of individual pages. In our approach, splogs
are recognized by their temporal characteristics and
content. There are three key ideas in our splog
detection framework. (a) We represent the blog temporal
dynamics using self-similarity matrices defined on the
histogram intersection similarity measure of the time,
content, and link attributes of posts, to investigate
the temporal changes of the post sequence. (b) We study
the blog temporal characteristics using a visual
representation derived from the self-similarity
measures. The visual signature reveals correlation
between attributes and posts, depending on the type of
blogs (normal blogs and splogs). (c) We propose two
types of novel temporal features to capture the splog
temporal characteristics. In our splog detector, these
novel features are combined with content based
features. We extract a content based feature vector
the blog. The dimensionality of the feature vector is
reduced by Fisher linear discriminant analysis. We have
tested an SVM-based splog detector using proposed
features on real world datasets, with appreciable
results (90\% accuracy).",
acknowledgement = ack-nhfb,
articleno =    "4",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "Blogs; regularity; self-similarity; spam; splog
detection; temporal dynamics; topology",
}

@Article{Weinreich:2008:QAE,
author =       "Harald Weinreich and Hartmut Obendorf and Eelco Herder
and Matthias Mayer",
title =        "Not quite the average: an empirical study of {Web}
use",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "5:1--5:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1326561.1326566",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "In the past decade, the World Wide Web has been
subject to dramatic changes. Web sites have evolved
from static information resources to dynamic and
interactive applications that are used for a broad
scope of activities on a daily basis. To examine the
consequences of these changes on user behavior, we
conducted a long-term client-side Web usage study with
twenty-five participants. This report presents results
of this study and compares the user behavior with
previous long-term browser usage studies, which range
in age from seven to thirteen years. Based on the
empirical data and the interview results, various
implications for the interface design of browsers and
Web sites are discussed.\par

A major finding is the decreasing prominence of
backtracking in Web navigation. This can largely be
attributed to the increasing importance of dynamic,
service-oriented Web sites. Users do not navigate on
these sites searching for information, but rather
interact with an online application to complete certain
tasks. Furthermore, the usage of multiple windows and
tabs has partly replaced back button usage, posing new
challenges for user orientation and backtracking. We
found that Web browsing is a rapid activity even for
pages with substantial content, which calls for page
designs that allow for cursory reading. Click maps
provide additional information on how users interact
with the Web on page level. Finally, substantial
differences were observed between users, and
characteristic usage patterns for different types of
Web sites emphasize the need for more adaptive and
customizable Web browsers.",
acknowledgement = ack-nhfb,
articleno =    "5",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "browser interfaces; hypertext; Navigation; usability;
user study; Web; web browsing; web design; WWW",
}

@Article{Yu:2008:FWS,
author =       "Qi Yu and Athman Bouguettaya",
title =        "Framework for {Web} service query algebra and
optimization",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "6:1--6:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1326561.1326567",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We present a query algebra that supports optimized
access of Web services through service-oriented
queries. The service query algebra is defined based on
a formal service model that provides a high-level
abstraction of Web services across an application
domain. The algebra defines a set of algebraic
operators. Algebraic service queries can be formulated
using these operators. This allows users to query their
desired services based on both functionality and
quality. We provide the implementation of each
algebraic operator. This enables the generation of
Service Execution Plans (SEPs) that can be used by
users to directly access services. We present an
optimization algorithm by extending the Dynamic
Programming (DP) approach to efficiently select the
SEPs with the best user-desired quality. The
experimental study validates the proposed algorithm by
demonstrating significant performance improvement
compared with the traditional DP approach.",
acknowledgement = ack-nhfb,
articleno =    "6",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "query optimization; service oriented computing;
service query; Web service",
}

@Article{Aleman-Meza:2008:SSA,
author =       "Boanerges Aleman-Meza and Meenakshi Nagarajan and Li
Ding and Amit Sheth and I. Budak Arpinar and Anupam
Joshi and Tim Finin",
title =        "Scalable semantic analytics on social networks for
addressing the problem of conflict of interest
detection",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "7:1--7:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1326561.1326568",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
semantic techniques for detection of Conflict of
Interest (COI). We explain the common challenges
involved in building scalable Semantic Web
connecting-the-dots problems. We describe in detail the
challenges involved in two important aspects on
building Semantic Web applications, namely, data
acquisition and entity disambiguation (or reference
reconciliation). We extend upon our previous work where
we integrated the collaborative network of a subset of
DBLP researchers with persons in a Friend-of-a-Friend
social network (FOAF). Our method finds the connections
between people, measures collaboration strength, and
includes heuristics that use friendship/affiliation
information to provide an estimate of potential COI in
a peer-review scenario. Evaluations are presented by
measuring what could have been the COI between accepted
papers in various conference tracks and their
respective program committee members. The experimental
results demonstrate that scalability can be achieved by
using a dataset of over 3 million entities (all
bibliographic data from DBLP and a large collection of
FOAF documents).",
acknowledgement = ack-nhfb,
articleno =    "7",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "conflict of interest; data fusion; DBLP; entity
disambiguation; ontologies; peer review process; RDF;
semantic analytics; semantic associations; Semantic
Web; social networks; swetoDblp",
}

@Article{Gmach:2008:AQS,
author =       "Daniel Gmach and Stefan Krompass and Andreas Scholz
and Martin Wimmer and Alfons Kemper",
title =        "Adaptive quality of service management for enterprise
services",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "8:1--8:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1326561.1326569",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "In the past, enterprise resource planning systems were
designed as monolithic software systems running on
centralized mainframes. Today, these systems are
(re-)designed as a repository of enterprise services
that are distributed throughout the available computing
infrastructure. These service oriented architectures
management concepts in order to achieve a high quality
of service level in terms of, for example,
availability, responsiveness, and throughput. The
adaptive management has to allocate service instances
to computing resources, adapt the resource allocation
to unforeseen load fluctuations, and intelligently
schedule individual requests to guarantee negotiated
service level agreements (SLAs). Our AutoGlobe platform
provides such a comprehensive adaptive service
management comprising\par

--- static service-to-server allocation based on
automatically detected service utilization
patterns,\par

--- adaptive service management based on a fuzzy
controller that remedies exceptional situations by
automatically initiating, for example, service
migration, service replication (scale-out), and\par

--- adaptive scheduling of individual service requests
that prioritizes requests depending on the current
degree of service level conformance.\par

All three complementary control components are
described in detail, and their effectiveness is
analyzed by means of realistic business application
scenarios.",
acknowledgement = ack-nhfb,
articleno =    "8",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "fuzzy controller; Quality of service; workload
characterization",
}

@Article{Yang:2008:DGN,
author =       "Bo Yang and Jiming Liu",
title =        "Discovering global network communities based on local
centralities",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "9:1--9:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1326561.1326570",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "One of the central problems in studying and
understanding complex networks, such as online social
networks or World Wide Web, is to discover hidden,
either physically (e.g., interactions or hyperlinks) or
logically (e.g., profiles or semantics) well-defined
topological structures. From a practical point of view,
a good example of such structures would be so-called
network communities. Earlier studies have introduced
various formulations as well as methods for the problem
of identifying or extracting communities. While each of
them has pros and cons as far as the effectiveness and
efficiency are concerned, almost none of them has
explicitly dealt with the potential relationship
between the global topological property of a network
and the local property of individual nodes. In order to
study this problem, this paper presents a new
algorithm, called ICS, which aims to discover natural
network communities by inferring from the local
information of nodes inherently hidden in networks
based on a new centrality, that is, clustering
centrality, which is a generalization of eigenvector
centrality. As compared with existing methods, our
method runs efficiently with a good clustering
performance. Additionally, it is insensitive to its
built-in parameters and prior knowledge.",
acknowledgement = ack-nhfb,
articleno =    "9",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "centrality; community mining; Complex network; graph
theory; World Wide Web",
}

@Article{Dustdar:2008:ISI,
author =       "Schahram Dustdar and Bernd J. Kr{\"a}mer",
title =        "Introduction to special issue on service oriented
computing {(SOC)}",
journal =      j-TWEB,
volume =       "2",
number =       "2",
pages =        "10:1--10:??",
month =        apr,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1346337.1346338",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
acknowledgement = ack-nhfb,
articleno =    "10",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Belhajjame:2008:AAW,
author =       "Khalid Belhajjame and Suzanne M. Embury and Norman W.
Paton and Robert Stevens and Carole A. Goble",
title =        "Automatic annotation of {Web} services based on
workflow definitions",
journal =      j-TWEB,
volume =       "2",
number =       "2",
pages =        "11:1--11:??",
month =        apr,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1346237.1346239",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Semantic annotations of web services can support the
effective and efficient discovery of services, and
guide their composition into workflows. At present,
however, the practical utility of such annotations is
limited by the small number of service annotations
available for general use. Manual annotation of
services is a time consuming and thus expensive task,
so some means are required by which services can be
automatically (or semi-automatically) annotated. In
this paper, we show how information can be inferred
about the semantics of operation parameters based on
their connections to other (annotated) operation
parameters within tried-and-tested workflows. Because
the data links in the workflows do not necessarily
contain every possible connection of compatible
parameters, we can infer only constraints on the
semantics of parameters. We show that despite their
imprecise nature these so-called {\em loose
annotations\/} are still of value in supporting the
manual annotation task, inspecting workflows and
discovering services. We also show that derived
annotations for already annotated parameters are
useful. By comparing existing and newly derived
annotations of operation parameters, we can support the
detection of errors in existing annotations, the
ontology used for annotation and in workflows. The
derivation mechanism has been implemented, and its
practical applicability for inferring new annotations
has been established through an experimental
evaluation. The usefulness of the derived annotations
is also demonstrated.",
acknowledgement = ack-nhfb,
articleno =    "11",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "automatic annotation; ontologies; semantic
annotations; Semantic web services; workflows",
}

@Article{Elgedawy:2008:CAH,
author =       "Islam Elgedawy and Zahir Tari and James A. Thom",
title =        "Correctness-aware high-level functional matching
approaches for semantic {Web} services",
journal =      j-TWEB,
volume =       "2",
number =       "2",
pages =        "12:1--12:??",
month =        apr,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1346237.1346240",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Service matching approaches trade precision for
recall, creating the need for users to choose the
correct services, which obviously is a major obstacle
for automating the service discovery and aggregation
processes. Our approach to overcome this problem, is to
eliminate the appearance of false positives by
returning only the correct services. As different users
have different semantics for what is correct, we argue
that the correctness of the matching results must be
determined according to the achievement of users'
goals: that only services achieving users' goals are
considered correct. To determine such correctness, we
argue that the matching process should be based
primarily on the high-level functional specifications
(namely goals, achievement contexts, and external
structures, algorithms, and theorems required to
correctly match such specifications. We propose a model
called $G^+$, to capture such specifications, for
both services and users, in a machine-understandable
format. We propose a data structure, called a Concepts
Substitutability Graph (CSG), to capture the
substitution semantics of application domain concepts
in a context-based manner, in order to determine the
semantic-preserving mapping transformations required to
match different {\em G\/}$^+$ models. We also propose a
behavior matching approach that is able to match states
in an m-to-n manner, such that behavior models with
different numbers of state transitions can be matched.
Finally, we show how services are matched and
aggregated according to their $G^+$ models. Results
of supporting experiments demonstrate the advantages of
the proposed service matching approaches.",
acknowledgement = ack-nhfb,
articleno =    "12",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "High-level functional matching; semantic Web services;
service aggregation",
}

@Article{Ryu:2008:SDE,
author =       "Seung Hwan Ryu and Fabio Casati and Halvard Skogsrud
and Boualem Benatallah and R{\'e}gis Saint-Paul",
title =        "Supporting the dynamic evolution of {Web} service
protocols in service-oriented architectures",
journal =      j-TWEB,
volume =       "2",
number =       "2",
pages =        "13:1--13:??",
month =        apr,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1346237.1346241",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "In service-oriented architectures, everything is a
service and everyone is a service provider. Web
services (or simply services) are loosely coupled
software components that are published, discovered, and
invoked across the Web. As the use of Web service
grows, in order to correctly interact with them, it is
important to understand the business protocols that
provide clients with the information on how to interact
with services. In dynamic Web service environments,
service providers need to constantly adapt their
business protocols for reflecting the restrictions and
requirements proposed by new applications, new business
strategies, and new laws, or for fixing problems found
in the protocol definition. However, the effective
management of such a protocol evolution raises critical
problems: one of the most critical issues is how to
handle instances running under the old protocol when it
has been changed. Simple solutions, such as aborting
them or allowing them to continue to run according to
the old protocol, can be considered, but they are
inapplicable for many reasons (for example, the loss of
work already done and the critical nature of work). In
service managers in managing the business protocol
evolution by providing several features, such as a
variety of protocol change impact analyses
automatically determining which ongoing instances can
be migrated to the new version of protocol, and data
mining techniques inferring interaction patterns used
for classifying ongoing instances migrateable to the
new protocol. To support the protocol evolution
process, we have also developed database-backed GUI
tools on top of our existing system. The proposed
approach and tools can help service managers in
managing the evolution of ongoing instances when the
business protocols of services with which they are
interacting have changed.",
acknowledgement = ack-nhfb,
articleno =    "13",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "Business protocols; change impact analysis; decision
trees; dynamic evolution; ongoing instances; Web
services",
}

@Article{Schafer:2008:EFA,
author =       "Michael Sch{\"a}fer and Peter Dolog and Wolfgang
Nejdl",
title =        "An environment for flexible advanced compensations of
{Web} service transactions",
journal =      j-TWEB,
volume =       "2",
number =       "2",
pages =        "14:1--14:??",
month =        apr,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1346237.1346242",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
performed by employing Web service environments.
Moreover, such environments are being provided by major
players on the technology markets. Those environments
are based on open specifications for transaction
coordination. When a failure in such an environment
occurs, a compensation can be initiated to recover from
the failure. However, current environments have only
limited capabilities for compensations, and are usually
introduce an environment to deal with advanced
compensations based on forward recovery principles. We
extend the existing Web service transaction
coordination architecture and infrastructure in order
to support flexible compensation operations. We use a
contract-based approach, which allows the specification
of permitted compensations at runtime. We introduce
{\em abstract service\/} and {\em adapter\/}
components, which allow us to separate the compensation
logic from the coordination logic. In this way, we can
easily plug in or plug out different compensation
strategies based on a specification language defined on
top of basic compensation activities and complex
compensation types. Experiments with our approach and
environment show that such an approach to compensation
is feasible and beneficial. Additionally, we introduce
a cost-benefit model to evaluate the proposed
environment based on net value analysis. The evaluation
shows in which circumstances the environment is
economical.",
acknowledgement = ack-nhfb,
articleno =    "14",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "compensations; forward-recovery; transactions; Web
services",
}

@Article{Srivatsa:2008:MAL,
author =       "Mudhakar Srivatsa and Arun Iyengar and Jian Yin and
Ling Liu",
title =        "Mitigating application-level denial of service attacks
on {Web} servers: a client-transparent approach",
journal =      j-TWEB,
volume =       "2",
number =       "3",
pages =        "15:1--15:??",
month =        jul,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1377488.1377489",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:58 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Recently, we have seen increasing numbers of denial of
service (DoS) attacks against online services and Web
applications either for extortion reasons or for
impairing and even disabling the competition. These DoS
attacks have increasingly targeted the application
level. Application-level DoS attacks emulate the same
request syntax and network-level traffic
characteristics as those of legitimate clients, thereby
making the attacks much harder to detect and counter.
Moreover, such attacks often target bottleneck
resources such as disk bandwidth, database bandwidth,
DoS attacks by using a twofold mechanism. First, we
perform admission control to limit the number of
concurrent clients served by the online service.
Admission control is based on port hiding that renders
the online service invisible to unauthorized clients by
hiding the port number on which the service accepts
incoming requests. Second, we perform congestion
control on admitted clients to allocate more resources
to good clients. Congestion control is achieved by
adaptively setting a client's priority level in
response to the client's requests in a way that can
incorporate application-level semantics. We present a
detailed evaluation of the proposed solution using two
sample applications: Apache HTTPD and the TPCW
benchmark (running on Apache Tomcat and IBM DB2). Our
experiments show that the proposed solution incurs low
performance overhead and is resilient to DoS attacks.",
acknowledgement = ack-nhfb,
articleno =    "15",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "client transparency; DoS Attacks; game theory; Web
servers",
}

@Article{White:2008:LPD,
author =       "Ryen W. White and Mikhail Bilenko and Silviu
Cucerzan",
title =        "Leveraging popular destinations to enhance {Web}
search interaction",
journal =      j-TWEB,
volume =       "2",
number =       "3",
pages =        "16:1--16:??",
month =        jul,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1377488.1377490",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:58 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
feature that for a given query provides links to Web
sites frequently visited by other users with similar
information needs. These popular destinations
complement traditional search results, allowing direct
navigation to authoritative resources for the query
topic. Destinations are identified using the history of
the search and browsing behavior of many users over an
extended time period, and their collective behavior
provides a basis for computing source authority. They
are drawn from the end of users' postquery browse
trails where users may cease searching once they find
relevant information. We describe a user study that
compared the suggestion of destinations with the
previously proposed suggestion of related queries as
well as with traditional, unaided Web search. Results
show that search enhanced by query suggestions
outperforms other systems in terms of subject
perceptions and search effectiveness for fact-finding
search tasks. However, search enhanced by destination
suggestions performs best for exploratory tasks with
its best performance obtained from mining past user
behavior at query-level granularity. We discuss the
implications of these and other findings from our study
for the design of search systems that utilize user
behavior, in particular, user browse trails and popular
destinations.",
acknowledgement = ack-nhfb,
articleno =    "16",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "enhanced Web search; search destinations; User
studies",
}

@Article{Andreolini:2008:MFS,
author =       "Mauro Andreolini and Sara Casolari and Michele
Colajanni",
title =        "Models and framework for supporting runtime decisions
in {Web-based} systems",
journal =      j-TWEB,
volume =       "2",
number =       "3",
pages =        "17:1--17:??",
month =        jul,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1377488.1377491",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:58 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Efficient management of distributed Web-based systems
requires several mechanisms that decide on request
redirection. The algorithms behind these mechanisms
typically make fast decisions on the basis of the load
conditions of the system resources. The architecture
complexity and workloads characterizing most Web-based
services make it extremely difficult to deduce a
representative view of a resource load from collected
measures that show extreme variability even at
different time scales. Hence, any decision based on
instantaneous or average views of the system load may
lead to useless or even wrong actions. As an
alternative, we propose a two-phase strategy that first
aims to obtain a representative view of the load trend
from measured system values and then applies this
representation to support runtime decision systems. We
consider two classical problems behind decisions: how
to detect significant and nontransient load changes of
a system resource and how to predict its future load
behavior. The two-phase strategy is based on stochastic
functions that are characterized by a computational
complexity that is compatible with runtime decisions.
We describe, test, and tune the two-phase strategy by
considering as a first example a multitier Web-based
system that is subject to different classes of
realistic and synthetic workloads. Also, we integrate
the proposed strategy into a framework that we validate
by applying it to support runtime decisions in a
cluster Web system and in a locally distributed Network
Intrusion Detection System.",
acknowledgement = ack-nhfb,
articleno =    "17",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
prediction; load representation; World Wide Web",
}

@Article{Amitay:2008:ISI,
author =       "Einat Amitay and Andrei Broder",
title =        "Introduction to special issue on query log analysis:
{Technology} and ethics",
journal =      j-TWEB,
volume =       "2",
number =       "4",
pages =        "18:1--18:??",
month =        oct,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1409220.1409221",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
acknowledgement = ack-nhfb,
articleno =    "18",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Cooper:2008:SQL,
author =       "Alissa Cooper",
title =        "A survey of query log privacy-enhancing techniques
from a policy perspective",
journal =      j-TWEB,
volume =       "2",
number =       "4",
pages =        "19:1--19:??",
month =        oct,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1409220.1409222",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "As popular search engines face the sometimes
conflicting interests of protecting privacy while
retaining query logs for a variety of uses, numerous
technical measures have been suggested to both enhance
privacy and preserve at least a portion of the utility
these techniques against three sets of criteria: (1)
how well the technique protects privacy, (2) how well
the technique preserves the utility of the query logs,
and (3) how well the technique might be implemented as
a user control. A user control is defined as a
mechanism that allows individual Internet users to
choose to have the technique applied to their own query
logs.",
acknowledgement = ack-nhfb,
articleno =    "19",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "log; policy; Privacy; query; search",
}

@Article{Baeza-Yates:2008:DTO,
author =       "Ricardo Baeza-Yates and Aristides Gionis and Flavio P.
Junqueira and Vanessa Murdock and Vassilis Plachouras
and Fabrizio Silvestri",
title =        "Design trade-offs for search engine caching",
journal =      j-TWEB,
volume =       "2",
number =       "4",
pages =        "20:1--20:??",
month =        oct,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1409220.1409223",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
efficient caching systems for Web search engines. We
explore the impact of different approaches, such as
static vs. dynamic caching, and caching query results
vs. caching posting lists. Using a query log spanning a
whole year, we explore the limitations of caching and
we demonstrate that caching posting lists can achieve
higher hit rates than caching query answers. We propose
a new algorithm for static caching of posting lists,
which outperforms previous methods. We also study the
problem of finding the optimal way to split the static
cache between answers and posting lists. Finally, we
effectiveness of static caching, given our observation
that the distribution of the queries changes slowly
over time. Our results and observations are applicable
to different levels of the data-access hierarchy, for
instance, for a memory/disk layer or a broker/remote
server layer.",
acknowledgement = ack-nhfb,
articleno =    "20",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "Caching; query logs; Web search",
}

@Article{Richardson:2008:LAW,
author =       "Matthew Richardson",
title =        "Learning about the world through long-term query
logs",
journal =      j-TWEB,
volume =       "2",
number =       "4",
pages =        "21:1--21:??",
month =        oct,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1409220.1409224",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
query logs. Most work on query logs to date considers
only short-term (within-session) query information. In
contrast, we show that long-term query logs can be used
to learn about the world we live in. There are many
applications of this that lead not only to improving
the search engine for its users, but also potentially
to advances in other disciplines such as medicine,
will show how long-term query logs can be used for
these purposes, and that their potential is severely
reduced if the logs are limited to short time horizons.
We show that query effects are long-lasting, provide
valuable information, and might be used to
automatically make medical discoveries, build concept
hierarchies, and generally learn about the sociological
behavior of users. We believe these applications are
only the beginning of what can be done with the
information contained in long-term query logs, and see
this work as a step toward unlocking their potential.",
acknowledgement = ack-nhfb,
articleno =    "21",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "data mining; knowledge discovery; query logs; user
behavior",
}

@Article{Koutrika:2008:CST,
author =       "Georgia Koutrika and Frans Adjie Effendi and
Zolt{\'{}}n Gy{\"o}ngyi and Paul Heymann and Hector
Garcia-Molina",
title =        "Combating spam in tagging systems: an evaluation",
journal =      j-TWEB,
volume =       "2",
number =       "4",
pages =        "22:1--22:??",
month =        oct,
year =         "2008",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1409220.1409225",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Tagging systems allow users to interactively annotate
a pool of shared resources using descriptive strings
called {\em tags}. Tags are used to guide users to
interesting resources and help them build communities
that share their expertise and resources. As tagging
systems are gaining in popularity, they become more
susceptible to {\em tag spam\/}: misleading tags that
are generated in order to increase the visibility of
some resources or simply to confuse users. Our goal is
to understand this problem better. In particular, we
are interested in answers to questions such as: How
many malicious users can a tagging system tolerate
before results significantly degrade? What types of
tagging systems are more vulnerable to malicious
attacks? What would be the effort and the impact of
employing a trusted moderator to find bad postings? Can
a system automatically protect itself from spam, for
instance, by exploiting user tag patterns? In a quest
for answers to these questions, we introduce a
framework for modeling tagging systems and user tagging
behavior. We also describe a method for ranking
documents matching a tag based on taggers' reliability.
Using our framework, we study the behavior of existing
approaches under malicious attacks and the impact of a
moderator and our ranking method.",
acknowledgement = ack-nhfb,
articleno =    "22",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "bookmarking systems; tag spam; Tagging; tagging
models",
}

@Article{Rattenbury:2009:MEP,
author =       "Tye Rattenbury and Mor Naaman",
title =        "Methods for extracting place semantics from {Flickr}
tags",
journal =      j-TWEB,
volume =       "3",
number =       "1",
pages =        "1:1--1:??",
month =        jan,
year =         "2009",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1462148.1462149",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:15 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We describe an approach for extracting semantics for
tags, unstructured text-labels assigned to resources on
the Web, based on each tag's usage patterns. In
particular, we focus on the problem of extracting place
semantics for tags that are assigned to photos on
Flickr, a popular-photo sharing Web site that supports
location (latitude/longitude) metadata for photos. We
propose the adaptation of two baseline methods,
inspired by well-known burst-analysis techniques, for
the task; we also describe two novel methods, TagMaps
and scale-structure identification. We evaluate the
methods on a subset of Flickr data. We show that our
scale-structure identification method outperforms
existing techniques and that a hybrid approach
generates further improvements (achieving 85\%
precision at 81\% recall). The approach and methods
described in this work can be used in other domains
such as geo-annotated Web pages, where text terms can
be extracted and associated with usage patterns.",
acknowledgement = ack-nhfb,
articleno =    "1",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "places; semantics; Tagging systems; tags",
}

@Article{Jackson:2009:PBD,
author =       "Collin Jackson and Adam Barth and Andrew Bortz and
Weidong Shao and Dan Boneh",
title =        "Protecting browsers from {DNS} rebinding attacks",
journal =      j-TWEB,
volume =       "3",
number =       "1",
pages =        "2:1--2:??",
month =        jan,
year =         "2009",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/1462148.1462150",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:15 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "DNS rebinding attacks subvert the same-origin policy
of browsers, converting them into open network proxies.
Using DNS rebinding, an attacker can circumvent
organizational and personal firewalls, send spam email,
and defraud pay-per-click advertisers. We evaluate the
cost effectiveness of mounting DNS rebinding attacks,
finding that an attacker requires less than \$100 to hijack 100,000 IP addresses. We analyze defenses to DNS rebinding attacks, including improvements to the classic DNS pinning,'' and recommend changes to browser plug-ins, firewalls, and Web servers. Our defenses have been adopted by plug-in vendors and by a number of open-source firewall implementations.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "click fraud; DNS; firewall; Same-origin policy; spam", } @Article{Bar-Yossef:2009:DCD, author = "Ziv Bar-Yossef and Idit Keidar and Uri Schonfeld", title = "Do not crawl in the {DUST}: {Different URLs with Similar Text}", journal = j-TWEB, volume = "3", number = "1", pages = "3:1--3:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1462148.1462151", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:15 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We consider the problem of DUST: Different URLs with Similar Text. Such duplicate URLs are prevalent in Web sites, as Web server software often uses aliases and redirections, and dynamically generates the same page from various different URL requests. We present a novel algorithm, {\em DustBuster}, for uncovering DUST; that is, for discovering rules that transform a given URL to others that are likely to have similar content. DustBuster mines DUST effectively from previous crawl logs or Web server logs, {\em without\/} examining page contents. Verifying these rules via sampling requires fetching few actual Web pages. Search engines can benefit from information about DUST to increase the effectiveness of crawling, reduce indexing overhead, and improve the quality of popularity statistics such as PageRank.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "antialiasing; crawling; duplicate detection; Search engines; URL normalization", } @Article{Xiao:2009:BSD, author = "Xiangye Xiao and Qiong Luo and Dan Hong and Hongbo Fu and Xing Xie and Wei-Ying Ma", title = "Browsing on small displays by transforming {Web} pages into hierarchically structured subpages", journal = j-TWEB, volume = "3", number = "1", pages = "4:1--4:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1462148.1462152", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:15 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We propose a new Web page transformation method to facilitate Web browsing on handheld devices such as Personal Digital Assistants (PDAs). In our approach, an original Web page that does not fit on the screen is transformed into a set of subpages, each of which fits on the screen. This transformation is done through slicing the original page into page blocks iteratively, with several factors considered. These factors include the size of the screen, the size of each page block, the number of blocks in each transformed page, the depth of the tree hierarchy that the transformed pages form, as well as the semantic coherence between blocks. We call the tree hierarchy of the transformed pages an SP-tree. In an SP-tree, an internal node consists of a textually enhanced thumbnail image with hyperlinks, and a leaf node is a block extracted from a subpage of the original Web page. We adaptively adjust the fanout and the height of the SP-tree so that each thumbnail image is clear enough for users to read, while at the same time, the number of clicks needed to reach a leaf page is few. Through this transformation algorithm, we preserve the contextual information in the original Web page and reduce scrolling. We have implemented this transformation module on a proxy server and have conducted usability studies on its performance. Our system achieved a shorter task completion time compared with that of transformations from the Opera browser in nine of ten tasks. The average improvement on familiar pages was 44\%. The average improvement on unfamiliar pages was 37\%. Subjective responses were positive.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Proxy; slicing tree; small displays; thumbnails; Web browsing; Web page adaptation", } @Article{Gabrilovich:2009:CSQ, author = "Evgeniy Gabrilovich and Andrei Broder and Marcus Fontoura and Amruta Joshi and Vanja Josifovski and Lance Riedel and Tong Zhang", title = "Classifying search queries using the {Web} as a source of knowledge", journal = j-TWEB, volume = "3", number = "2", pages = "5:1--5:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1513876.1513877", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:23 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We propose a methodology for building a robust query classification system that can identify thousands of query classes, while dealing in real time with the query volume of a commercial Web search engine. We use a pseudo relevance feedback technique: given a query, we determine its topic by classifying the Web search results retrieved by the query. Motivated by the needs of search advertising, we primarily focus on rare queries, which are the hardest from the point of view of machine learning, yet in aggregate account for a considerable fraction of search engine traffic. Empirical evaluation confirms that our methodology yields a considerably higher classification accuracy than previously reported. We believe that the proposed methodology will lead to better matching of online ads to rare queries and overall to a better user experience.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Pseudo relevance feedback; query classification; Web search", } @Article{Reay:2009:LSE, author = "Ian Reay and Scott Dick and James Miller", title = "A large-scale empirical study of {P3P} privacy policies: {Stated} actions vs. legal obligations", journal = j-TWEB, volume = "3", number = "2", pages = "6:1--6:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1513876.1513878", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:23 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Numerous studies over the past ten years have shown that concern for personal privacy is a major impediment to the growth of e-commerce. These concerns are so serious that most if not all consumer watchdog groups have called for some form of privacy protection for Internet users. In response, many nations around the world, including all European Union nations, Canada, Japan, and Australia, have enacted national legislation establishing mandatory safeguards for personal privacy. However, recent evidence indicates that Web sites might not be adhering to the requirements of this legislation. The goal of this study is to examine the posted privacy policies of Web sites, and compare these statements to the legal mandates under which the Web sites operate. We harvested all available P3P (Platform for Privacy Preferences Protocol) documents from the 100,000 most popular Web sites (over 3,000 full policies, and another 3,000 compact policies). This allows us to undertake an automated analysis of adherence to legal mandates on Web sites that most impact the average Internet user. Our findings show that Web sites generally do not even claim to follow all the privacy-protection mandates in their legal jurisdiction (we do not examine actual practice, only posted policies). Furthermore, this general statement appears to be true for every jurisdiction with privacy laws and any significant number of P3P policies, including European Union nations, Canada, Australia, and Web sites in the USA Safe Harbor program.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "electronic commerce; legislation and enforcement; P3P; Privacy protection", } @Article{Dourisboure:2009:ECD, author = "Yon Dourisboure and Filippo Geraci and Marco Pellegrini", title = "Extraction and classification of dense implicit communities in the {Web} graph", journal = j-TWEB, volume = "3", number = "2", pages = "7:1--7:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1513876.1513879", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:23 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The World Wide Web (WWW) is rapidly becoming important for society as a medium for sharing data, information, and services, and there is a growing interest in tools for understanding collective behavior and emerging phenomena in the WWW. In this article we focus on the problem of searching and classifying {\em communities\/} in the Web. Loosely speaking a community is a group of pages related to a common interest. More formally, communities have been associated in the computer science literature with the existence of a locally dense subgraph of the Web graph (where Web pages are nodes and hyperlinks are arcs of the Web graph). The core of our contribution is a new scalable algorithm for finding relatively dense subgraphs in massive graphs. We apply our algorithm on Web graphs built on three publicly available large crawls of the Web (with raw sizes up to 120M nodes and 1G arcs). The effectiveness of our algorithm in finding dense subgraphs is demonstrated experimentally by embedding artificial communities in the Web graph and counting how many of these are blindly found. Effectiveness increases with the size and density of the communities: it is close to 100\% for communities of thirty nodes or more (even at low density). It is still about 80\% even for communities of twenty nodes with density over 50\% of the arcs present. At the lower extremes the algorithm catches 35\% of dense communities made of ten nodes. We also develop some sufficient conditions for the detection of a community under some local graph models and not-too-restrictive hypotheses. We complete our {\em Community Watch\/} system by clustering the communities found in the Web graph into homogeneous groups by topic and labeling each group by representative keywords.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "communities; detection of dense subgraph; Web graph", } @Article{Lee:2009:ISB, author = "Hsin-Tsang Lee and Derek Leonard and Xiaoming Wang and Dmitri Loguinov", title = "{IRLbot}: {Scaling} to 6 billion pages and beyond", journal = j-TWEB, volume = "3", number = "3", pages = "8:1--8:??", month = jun, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1541822.1541823", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article shares our experience in designing a Web crawler that can download billions of pages using a single-server implementation and models its performance. We first show that current crawling algorithms cannot effectively cope with the sheer volume of URLs generated in large crawls, highly branching spam, legitimate multimillion-page blog sites, and infinite loops created by server-side scripts. We then offer a set of techniques for dealing with these issues and test their performance in an implementation we call IRLbot. In our recent experiment that lasted 41 days, IRLbot running on a single server successfully crawled 6.3 billion valid HTML pages (7.6 billion connection requests) and sustained an average download rate of 319 mb/s (1,789 pages/s). Unlike our prior experiments with algorithms proposed in related work, this version of IRLbot did not experience any bottlenecks and successfully handled content from over 117 million hosts, parsed out 394 billion links, and discovered a subset of the Web graph with 41 billion unique nodes.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "crawling; IRLbot; large scale", } @Article{Tappenden:2009:CDS, author = "Andrew F. Tappenden and James Miller", title = "Cookies: a deployment study and the testing implications", journal = j-TWEB, volume = "3", number = "3", pages = "9:1--9:??", month = jun, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1541822.1541824", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The results of an extensive investigation of cookie deployment amongst 100,000 Internet sites are presented. Cookie deployment is found to be approaching universal levels and hence there exists an associated need for relevant Web and software engineering processes, specifically testing strategies which actively consider cookies. The semi-automated investigation demonstrates that over two-thirds of the sites studied deploy cookies. The investigation specifically examines the use of first-party, third-party, sessional, and persistent cookies within Web-based applications, identifying the presence of a P3P policy and dynamic Web technologies as major predictors of cookie usage. The results are juxtaposed with the lack of testing strategies present in the literature. A number of real-world examples, including two case studies are presented, further accentuating the need for comprehensive testing strategies for Web-based applications. The use of antirandom test case generation is explored with respect to the testing issues discussed. Finally, a number of seeding vectors are presented, providing a basis for testing cookies within Web-based applications.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Cookies; Internet browser; software testing; Web engineering; Web technologies", } @Article{Comuzzi:2009:FQB, author = "Marco Comuzzi and Barbara Pernici", title = "A framework for {QoS}-based {Web} service contracting", journal = j-TWEB, volume = "3", number = "3", pages = "10:1--10:??", month = jun, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1541822.1541825", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The extensive adoption of Web service-based applications in dynamic business scenarios, such as on-demand computing or highly reconfigurable virtual enterprises, advocates for methods and tools for the management of Web service nonfunctional aspects, such as Quality of Service (QoS). Concerning contracts on Web service QoS, the literature has mostly focused on the contract definition and on mechanisms for contract enactment, such as the monitoring of the satisfaction of negotiated QoS guarantees. In this context, this article proposes a framework for the automation of the Web service contract specification and establishment. An extensible model for defining both domain-dependent and domain-independent Web service QoS dimensions and a method for the automation of the contract establishment phase are proposed. We describe a matchmaking algorithm for the ranking of functionally equivalent services, which orders services on the basis of their ability to fulfill the service requestor requirements, while maintaining the price below a specified budget. We also provide an algorithm for the configuration of the negotiable part of the QoS Service-Level Agreement (SLA), which is used to configure the agreement with the top-ranked service identified in the matchmaking phase. Experimental results show that, in a utility theory perspective, the contract establishment phase leads to efficient outcomes. We envision two advanced application scenarios for the Web service contracting framework proposed in this article. First, it can be used to enhance Web services self-healing properties in reaction to QoS-related service failures; second, it can be exploited in process optimization for the online reconfiguration of candidate Web services QoS SLAs.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "matchmaking; negotiation; QoS; service selection; SLA; Web service", } @Article{Pilioura:2009:UPD, author = "Thomi Pilioura and Aphrodite Tsalgatidou", title = "Unified publication and discovery of semantic {Web} services", journal = j-TWEB, volume = "3", number = "3", pages = "11:1--11:??", month = jun, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1541822.1541826", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The challenge of publishing and discovering Web services has recently received lots of attention. Various solutions to this problem have been proposed which, apart from their offered advantages, suffer the following disadvantages: (i) most of them are syntactic-based, leading to poor precision and recall, (ii) they are not scalable to large numbers of services, and (iii) they are incompatible, thus yielding in cumbersome service publication and discovery. This article presents the principles, the functionality, and the design of PYRAMID-S which addresses these disadvantages by providing a scalable framework for unified publication and discovery of semantically enhanced services over heterogeneous registries. PYRAMID-S uses a hybrid peer-to-peer topology to organize Web service registries based on domains. In such a topology, each Registry retains its autonomy, meaning that it can use the publication and discovery mechanisms as well as the ontology of its choice. The viability of this approach is demonstrated through the implementation and experimental analysis of a prototype.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "evaluation; PYRAMID-S; scalable; semantic Web services; unified; Web service discovery; Web service publication", } @Article{Golbeck:2009:TNP, author = "Jennifer Golbeck", title = "Trust and nuanced profile similarity in online social networks", journal = j-TWEB, volume = "3", number = "4", pages = "12:1--12:??", month = sep, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1594173.1594174", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:43 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Singh:2009:SSO, author = "Aameek Singh and Mudhakar Srivatsa and Ling Liu", title = "Search-as-a-service: {Outsourced} search over outsourced storage", journal = j-TWEB, volume = "3", number = "4", pages = "13:1--13:??", month = sep, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1594173.1594175", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:43 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Robu:2009:ECS, author = "Valentin Robu and Harry Halpin and Hana Shepherd", title = "Emergence of consensus and shared vocabularies in collaborative tagging systems", journal = j-TWEB, volume = "3", number = "4", pages = "14:1--14:??", month = sep, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1594173.1594176", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:43 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zheng:2010:UTM, author = "Yu Zheng and Yukun Chen and Quannan Li and Xing Xie and Wei-Ying Ma", title = "Understanding transportation modes based on {GPS} data for {Web} applications", journal = j-TWEB, volume = "4", number = "1", pages = "1:1--1:??", month = jan, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1658373.1658374", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:45 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Li:2010:DSO, author = "Guoli Li and Vinod Muthusamy and Hans-Arno Jacobsen", title = "A distributed service-oriented architecture for business process execution", journal = j-TWEB, volume = "4", number = "1", pages = "2:1--2:??", month = jan, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1658373.1658375", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:45 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Montali:2010:DSV, author = "Marco Montali and Maja Pesic and Wil M. P. van der Aalst and Federico Chesani and Paola Mello and Sergio Storari", title = "Declarative specification and verification of service choreographies", journal = j-TWEB, volume = "4", number = "1", pages = "3:1--3:??", month = jan, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1658373.1658376", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:45 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Almishari:2010:APD, author = "Mishari Almishari and Xiaowei Yang", title = "Ads-portal domains: {Identification} and measurements", journal = j-TWEB, volume = "4", number = "2", pages = "4:1--4:??", month = apr, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1734200.1734201", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "An ads-portal domain refers to a Web domain that shows only advertisements, served by a third-party advertisement syndication service, in the form of ads listing. We develop a machine-learning-based classifier to identify ads-portal domains, which has 96\% accuracy. We use this classifier to measure the prevalence of ads-portal domains on the Internet. Surprisingly, 28.3/25\% of the (two-level) {\tt *.com} /{\tt *.net} web domains are ads-portal domains. Also, 41/39.8\% of {\tt *.com} /{\tt *.net} ads-portal domains are typos of well-known domains, also known as typo-squatting domains. In addition, we use the classifier along with DNS trace files to estimate how often Internet users visit ads-portal domains. It turns out that$ \approx 5 \% $of the two-level {\tt *.com}, {\tt *.net}, {\tt *.org}, {\tt *.biz} and {\tt *.info} web domains on the traces are ads-portal domains and$
\approx 50 \% $of these accessed ads-portal domains are typos. These numbers show that ads-portal domains and typo-squatting ads-portal domains are prevalent on the Internet and successful in attracting many visits. Our classifier represents a step towards better categorizing the web documents. It can also be helpful to search engines ranking algorithms, helpful in identifying web spams that redirects to ads-portal domains, and used to discourage access to typo-squatting ads-portal domains.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Ads-portal; advertisement syndication; data mining; parked domain; parking service; Web characterization", } @Article{Jurca:2010:RIB, author = "Radu Jurca and Florent Garcin and Arjun Talwar and Boi Faltings", title = "Reporting incentives and biases in online review forums", journal = j-TWEB, volume = "4", number = "2", pages = "5:1--5:??", month = apr, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1734200.1734202", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Online reviews have become increasingly popular as a way to judge the quality of various products and services. However, recent work demonstrates that the absence of reporting incentives leads to a biased set of reviews that may not reflect the true quality. In this paper, we investigate underlying factors that influence users when reporting feedback. In particular, we study both reporting incentives and reporting biases observed in a widely used review forum, the Tripadvisor Web site. We consider three sources of information: first, the numerical ratings left by the user for different aspects of quality; second, the textual comment accompanying a review; third, the patterns in the time sequence of reports. We first show that groups of users who discuss a certain feature at length are more likely to agree in their ratings. Second, we show that users are more motivated to give feedback when they perceive a greater risk involved in a transaction. Third, a user's rating partly reflects the difference between true quality and prior expectation of quality, as inferred from previous reviews. We finally observe that because of these biases, when averaging review scores there are strong differences between the mean and the median. We speculate that the median may be a better way to summarize the ratings.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Online reviews; reputation mechanisms", } @Article{Vlachos:2010:ODB, author = "Michail Vlachos and Suleyman S. Kozat and Philip S. Yu", title = "Optimal distance bounds for fast search on compressed time-series query logs", journal = j-TWEB, volume = "4", number = "2", pages = "6:1--6:??", month = apr, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1734200.1734203", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Consider a database of time-series, where each datapoint in the series records the total number of users who asked for a specific query at an Internet search engine. Storage and analysis of such logs can be very beneficial for a search company from multiple perspectives. First, from a data organization perspective, because query Weblogs capture important trends and statistics, they can help enhance and optimize the search experience (keyword recommendation, discovery of news events). Second, Weblog data can provide an important polling mechanism for the microeconomic aspects of a search engine, since they can facilitate and promote the advertising facet of the search engine (understand what users request and when they request it).\par Due to the sheer amount of time-series Weblogs, manipulation of the logs in a compressed form is an impeding necessity for fast data processing and compact storage requirements. Here, we explicate how to compute the lower and upper distance bounds on the time-series logs when working directly on their compressed form. Optimal distance estimation means tighter bounds, leading to better candidate selection/elimination and ultimately faster search performance. Our derivation of the optimal distance bounds is based on the careful analysis of the problem using optimization principles. The experimental evaluation suggests a clear performance advantage of the proposed method, compared to previous compression/search techniques. The presented method results in a 10--30\% improvement on distance estimations, which in turn leads to 25--80\% improvement on the search performance.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Fraternali:2010:ERI, author = "Piero Fraternali and Sara Comai and Alessandro Bozzon and Giovanni Toffetti Carughi", title = "Engineering rich {Internet} applications with a model-driven approach", journal = j-TWEB, volume = "4", number = "2", pages = "7:1--7:??", month = apr, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1734200.1734204", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Rich Internet Applications (RIAs) have introduced powerful novel functionalities into the Web architecture, borrowed from client-server and desktop applications. The resulting platforms allow designers to improve the user's experience, by exploiting client-side data and computation, bidirectional client-server communication, synchronous and asynchronous events, and rich interface widgets. However, the rapid evolution of RIA technologies challenges the Model-Driven Development methodologies that have been successfully applied in the past decade to traditional Web solutions. This paper illustrates an evolutionary approach for incorporating a wealth of RIA features into an existing Web engineering methodology and notation. The experience demonstrates that it is possible to model RIA application requirements at a high-level using a platform-independent notation, and generate the client-side and server-side code automatically. The resulting approach is evaluated in terms of expressive power, ease of use, and implementability.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "information interfaces and presentation; information storage and retrieval; model-driven development; Rich Internet applications; Web engineering", } @Article{Xiao:2010:LSS, author = "Xiangye Xiao and Qiong Luo and Zhisheng Li and Xing Xie and Wei-Ying Ma", title = "A large-scale study on map search logs", journal = j-TWEB, volume = "4", number = "3", pages = "8:1--8:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1806916.1806917", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Map search engines, such as Google Maps, Yahoo! Maps, and Microsoft Live Maps, allow users to explicitly specify a target geographic location, either in keywords or on the map, and to search businesses, people, and other information of that location. In this article, we report a first study on a million-entry map search log. We identify three key attributes of a map search record --- the keyword query, the target location and the user location, and examine the characteristics of these three dimensions separately as well as the associations between them. Comparing our results with those previously reported on logs of general search engines and mobile search engines, including those for geographic queries, we discover the following unique features of map search: (1) People use longer queries and modify queries more frequently in a session than in general search and mobile search; People view fewer result pages per query than in general search; (2) The popular query topics in map search are different from those in general search and mobile search; (3) The target locations in a session change within 50 kilometers for almost 80\% of the sessions; (4) Queries, search target locations and user locations (both at the city level) all follow the power law distribution; (5) One third of queries are issued for target locations within 50 kilometers from the user locations; (6) The distribution of a query over target locations appears to follow the geographic location of the queried entity.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "local search; log analysis; Map search; query categorization; search interface; user behavior", } @Article{Malak:2010:MWQ, author = "Ghazwa Malak and Houari Sahraoui and Linda Badri and Mourad Badri", title = "Modeling {Web} quality using a probabilistic approach: an empirical validation", journal = j-TWEB, volume = "4", number = "3", pages = "9:1--9:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1806916.1806918", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web-based applications are software systems that continuously evolve to meet users' needs and to adapt to new technologies. Assuring their quality is then a difficult, but essential task. In fact, a large number of factors can affect their quality. Considering these factors and their interaction involves managing uncertainty and subjectivity inherent to this kind of applications. In this article, we present a probabilistic approach for building Web quality models and the associated assessment method. The proposed approach is based on Bayesian Networks. A model is built following a four-step process consisting in collecting quality characteristics, refining them, building a model structure, and deriving the model parameters.\par The feasibility of the approach is illustrated on the important quality characteristic of {\em Navigability design}. To validate the produced model, we conducted an experimental study with 20 subjects and 40 web pages. The results obtained show that the scores given by the used model are strongly correlated with navigability as perceived and experienced by the users.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Bayesian networks; Navigability design; probabilistic approach; quality evaluation; Web applications", } @Article{Poblete:2010:PPQ, author = "Barbara Poblete and Myra Spiliopoulou and Ricardo Baeza-Yates", title = "Privacy-preserving query log mining for business confidentiality protection", journal = j-TWEB, volume = "4", number = "3", pages = "10:1--10:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1806916.1806919", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We introduce the concern of confidentiality protection of business information for the publication of search engine query logs and derived data. We study business confidentiality, as the protection of nonpublic data from institutions, such as companies and people in the public eye. In particular, we relate this concern to the involuntary exposure of confidential Web site information, and we transfer this problem into the field of privacy-preserving data mining. We characterize the possible adversaries interested in disclosing Web site confidential data and the attack strategies that they could use. These attacks are based on different vulnerabilities found in query log for which we present several anonymization heuristics to prevent them. We perform an experimental evaluation to estimate the remaining utility of the log after the application of our anonymization techniques. Our experimental results show that a query log can be anonymized against these specific attacks while retaining a significant volume of useful data.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Privacy preservation; queries; query log publication; Web sites", } @Article{Consens:2010:EXW, author = "Mariano P. Consens and Ren{\'e}e J. Miller and Flavio Rizzolo and Alejandro A. Vaisman", title = "Exploring {XML} {Web} collections with {DescribeX}", journal = j-TWEB, volume = "4", number = "3", pages = "11:1--11:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1806916.1806920", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "As Web applications mature and evolve, the nature of the semistructured data that drives these applications also changes. An important trend is the need for increased flexibility in the structure of Web documents. Hence, applications cannot rely solely on schemas to provide the complex knowledge needed to visualize, use, query and manage documents. Even when XML Web documents are valid with regard to a schema, the actual structure of such documents may exhibit significant variations across collections for several reasons: the schema may be very lax (e.g., RSS feeds), the schema may be large and different subsets of it may be used in different documents (e.g., industry standards like UBL), or open content models may allow arbitrary schemas to be mixed (e.g., RSS extensions like those used for podcasting). For these reasons, many applications that incorporate XPath queries to process a large Web document collection require an understanding of the actual structure present in the collection, and not just the schema.\par To support modern Web applications, we introduce DescribeX, a powerful framework that is capable of describing complex XML summaries of Web collections. DescribeX supports the construction of heterogeneous summaries that can be declaratively defined and refined by means of axis path regular expression (AxPREs). AxPREs provide the flexibility necessary for declaratively defining complex mappings between instance nodes (in the documents) and summary nodes. These mappings are capable of expressing order and cardinality, among other properties, which can significantly help in the understanding of the structure of large collections of XML documents and enhance the performance of Web applications over these collections. DescribeX captures most summary proposals in the literature by providing (for the first time) a common declarative definition for them. Experimental results demonstrate the scalability of DescribeX summary operations (summary creation, as well as refinement and stabilization, two key enablers for tailoring summaries) on multi-gigabyte Web collections.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Semistructured data; structural summaries; XML; XPath", } @Article{Adams:2010:DLS, author = "Brett Adams and Dinh Phung and Svetha Venkatesh", title = "Discovery of latent subcommunities in a blog's readership", journal = j-TWEB, volume = "4", number = "3", pages = "12:1--12:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1806916.1806921", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The blogosphere has grown to be a mainstream forum of social interaction as well as a commercially attractive source of information and influence. Tools are needed to better understand how communities that adhere to individual blogs are constituted in order to facilitate new personal, socially-focused browsing paradigms, and understand how blog content is consumed, which is of interest to blog authors, big media, and search. We present a novel approach to blog subcommunity characterization by modeling individual blog readers using mixtures of an extension to the LDA family that jointly models phrases and time, Ngram Topic over Time (NTOT), and cluster with a number of similarity measures using Affinity Propagation. We experiment with two datasets: a small set of blogs whose authors provide feedback, and a set of popular, highly commented blogs, which provide indicators of algorithm scalability and interpretability without prior knowledge of a given blog. The results offer useful insight to the blog authors about their commenting community, and are observed to offer an integrated perspective on the topics of discussion and members engaged in those discussions for unfamiliar blogs. Our approach also holds promise as a component of solutions to related problems, such as online entity resolution and role discovery.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "affinity propagation; Blog; topic models; Web communities", } @Article{Kiciman:2010:APR, author = "Emre Kiciman and Benjamin Livshits", title = "{AjaxScope}: a Platform for Remotely Monitoring the Client-Side Behavior of {Web 2.0} Applications", journal = j-TWEB, volume = "4", number = "4", pages = "13:1--13:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1841909.1841910", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bex:2010:LDR, author = "Geert Jan Bex and Wouter Gelade and Frank Neven and Stijn Vansummeren", title = "Learning Deterministic Regular Expressions for the Inference of Schemas from {XML} Data", journal = j-TWEB, volume = "4", number = "4", pages = "14:1--14:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1841909.1841911", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bailey:2010:MHQ, author = "Peter Bailey and Ryen W. White and Han Liu and Giridhar Kumaran", title = "Mining Historic Query Trails to Label Long and Rare Search Engine Queries", journal = j-TWEB, volume = "4", number = "4", pages = "15:1--15:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1841909.1841912", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Claude:2010:FCW, author = "Francisco Claude and Gonzalo Navarro", title = "Fast and Compact {Web} Graph Representations", journal = j-TWEB, volume = "4", number = "4", pages = "16:1--16:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1841909.1841913", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Swaminathan:2010:RRM, author = "Ashwin Swaminathan and Renan G. Cattelan and Ydo Wexler and Cherian V. Mathew and Darko Kirovski", title = "Relating Reputation and Money in Online Markets", journal = j-TWEB, volume = "4", number = "4", pages = "17:1--17:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1841909.1841914", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Riedl:2011:ISI, author = "John Riedl and Barry Smyth", title = "Introduction to special issue on recommender systems", journal = j-TWEB, volume = "5", number = "1", pages = "1:1--1:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921591.1921592", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cacheda:2011:CCF, author = "Fidel Cacheda and V{\'\i}ctor Carneiro and Diego Fern{\'a}ndez and Vreixo Formoso", title = "Comparison of collaborative filtering algorithms: Limitations of current techniques and proposals for scalable, high-performance recommender systems", journal = j-TWEB, volume = "5", number = "1", pages = "2:1--2:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921591.1921593", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Umyarov:2011:UEA, author = "Akhmed Umyarov and Alexander Tuzhilin", title = "Using external aggregate ratings for improving individual recommendations", journal = j-TWEB, volume = "5", number = "1", pages = "3:1--3:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921591.1921594", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Song:2011:ATR, author = "Yang Song and Lu Zhang and C. Lee Giles", title = "Automatic tag recommendation algorithms for social recommender systems", journal = j-TWEB, volume = "5", number = "1", pages = "4:1--4:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921591.1921595", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zheng:2011:RFL, author = "Yu Zheng and Lizhu Zhang and Zhengxin Ma and Xing Xie and Wei-Ying Ma", title = "Recommending friends and locations based on individual location history", journal = j-TWEB, volume = "5", number = "1", pages = "5:1--5:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921591.1921596", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wu:2011:TDQ, author = "Mingfang Wu and Falk Scholer and Andrew Turpin", title = "Topic Distillation with Query-Dependent Link Connections and Page Characteristics", journal = j-TWEB, volume = "5", number = "2", pages = "6:1--6:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961660", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Hurley:2011:HBP, author = "John Hurley and Emi Garcia-Palacios and Sakir Sezer", title = "Host-Based {P2P} Flow Identification and Use in Real-Time", journal = j-TWEB, volume = "5", number = "2", pages = "7:1--7:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961661", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Mitra:2011:CWB, author = "Siddharth Mitra and Mayank Agrawal and Amit Yadav and Niklas Carlsson and Derek Eager and Anirban Mahanti", title = "Characterizing {Web}-Based Video Sharing Workloads", journal = j-TWEB, volume = "5", number = "2", pages = "8:1--8:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961662", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Ozcan:2011:CAS, author = "Rifat Ozcan and Ismail Sengor Altingovde and {\"O}zg{\"u}r Ulusoy", title = "Cost-Aware Strategies for Query Result Caching in {Web} Search Engines", journal = j-TWEB, volume = "5", number = "2", pages = "9:1--9:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961663", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Valderas:2011:SRS, author = "Pedro Valderas and Vicente Pelechano", title = "A Survey of Requirements Specification in Model-Driven Development of {Web} Applications", journal = j-TWEB, volume = "5", number = "2", pages = "10:1--10:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961664", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Grier:2011:DIO, author = "Chris Grier and Shuo Tang and Samuel T. King", title = "Designing and Implementing the {OP} and {OP2} {Web} Browsers", journal = j-TWEB, volume = "5", number = "2", pages = "11:1--11:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961665", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Koutsonikola:2011:CDL, author = "Vassiliki Koutsonikola and Athena Vakali", title = "A Clustering-Driven {LDAP} Framework", journal = j-TWEB, volume = "5", number = "3", pages = "12:1--12:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993053.1993054", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Paci:2011:AAC, author = "Federica Paci and Massimo Mecella and Mourad Ouzzani and Elisa Bertino", title = "{ACConv} -- An Access Control Model for Conversational {Web} Services", journal = j-TWEB, volume = "5", number = "3", pages = "13:1--13:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993053.1993055", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zeginis:2011:CDR, author = "Dimitris Zeginis and Yannis Tzitzikas and Vassilis Christophides", title = "On Computing Deltas of {RDF/S} Knowledge Bases", journal = j-TWEB, volume = "5", number = "3", pages = "14:1--14:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993053.1993056", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Baykan:2011:CSF, author = "Eda Baykan and Monika Henzinger and Ludmila Marian and Ingmar Weber", title = "A Comprehensive Study of Features and Algorithms for {URL}-Based Topic Classification", journal = j-TWEB, volume = "5", number = "3", pages = "15:1--15:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993053.1993057", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Tuchinda:2011:BMD, author = "Rattapoom Tuchinda and Craig A. Knoblock and Pedro Szekely", title = "Building Mashups by Demonstration", journal = j-TWEB, volume = "5", number = "3", pages = "16:1--16:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993053.1993058", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Alzoubi:2011:PAA, author = "Hussein A. Alzoubi and Seungjoon Lee and Michael Rabinovich and Oliver Spatscheck and Jacobus {Van Der Merwe}", title = "A Practical Architecture for an {Anycast CDN}", journal = j-TWEB, volume = "5", number = "4", pages = "17:1--17:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2019643.2019644", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "IP Anycast has many attractive features for any service that involve the replication of multiple instances across the Internet. IP Anycast allows multiple instances of the same service to be naturally' discovered, and requests for this service to be delivered to the closest instance. However, while briefly considered as an enabler for content delivery networks (CDNs) when they first emerged, IP Anycast was deemed infeasible in that environment. The main reasons for this decision were the lack of load awareness of IP Anycast and unwanted side effects of Internet routing changes on the IP Anycast mechanism. In this article we re-evaluate IP Anycast for CDNs by proposing a load-aware IP Anycast CDN architecture. Our architecture is prompted by recent developments in route control technology, as well as better understanding of the behavior of IP Anycast in operational settings. Our architecture makes use of route control mechanisms to take server and network load into account to realize load-aware Anycast. We show that the resulting redirection requirements can be formulated as a Generalized Assignment Problem and present practical algorithms that address these requirements while at the same time limiting connection disruptions that plague regular IP Anycast. We evaluate our algorithms through trace based simulation using traces obtained from a production CDN network.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bar-Yossef:2011:ESE, author = "Ziv Bar-Yossef and Maxim Gurevich", title = "Efficient Search Engine Measurements", journal = j-TWEB, volume = "5", number = "4", pages = "18:1--18:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2019643.2019645", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We address the problem of externally measuring aggregate functions over documents indexed by search engines, like corpus size, index freshness, and density of duplicates in the corpus. State of the art estimators for such quantities [Bar-Yossef and Gurevich 2008b; Broder et al. 2006] are biased due to inaccurate approximation of the so called document degrees'. In addition, the estimators in Bar-Yossef and Gurevich [2008b] are quite costly, due to their reliance on rejection sampling. We present new estimators that are able to overcome the bias introduced by approximate degrees. Our estimators are based on a careful implementation of an approximate importance sampling procedure. Comprehensive theoretical and empirical analysis of the estimators demonstrates that they have essentially no bias even in situations where document degrees are poorly approximated. By avoiding the costly rejection sampling approach, our new importance sampling estimators are significantly more efficient than the estimators proposed in Bar-Yossef and Gurevich [2008b]. Furthermore, building on an idea from Broder et al. [2006], we discuss Rao-Blackwellization as a generic method for reducing variance in search engine estimators. We show that Rao-Blackwellizing our estimators results in performance improvements, without compromising accuracy.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Gill:2011:COU, author = "Phillipa Gill and Martin Arlitt and Niklas Carlsson and Anirban Mahanti and Carey Williamson", title = "Characterizing Organizational Use of {Web}-Based Services: Methodology, Challenges, Observations, and Insights", journal = j-TWEB, volume = "5", number = "4", pages = "19:1--19:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2019643.2019646", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Today's Web provides many different functionalities, including communication, entertainment, social networking, and information retrieval. In this article, we analyze traces of HTTP activity from a large enterprise and from a large university to identify and characterize Web-based service usage. Our work provides an initial methodology for the analysis of Web-based services. While it is nontrivial to identify the classes, instances, and providers for each transaction, our results show that most of the traffic comes from a small subset of providers, which can be classified manually. Furthermore, we assess both qualitatively and quantitatively how the Web has evolved over the past decade, and discuss the implications of these changes.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Singla:2011:CBC, author = "Adish Singla and Ingmar Weber", title = "Camera Brand Congruence and Camera Model Propagation in the {Flickr} Social Graph", journal = j-TWEB, volume = "5", number = "4", pages = "20:1--20:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2019643.2019647", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Given that my friends on Flickr use cameras of brand X, am I more likely to also use a camera of brand X? Given that one of these friends changes her brand, am I likely to do the same? Do new camera models pop up uniformly in the friendship graph? Or do early adopters then convert' their friends? Which factors influence the conversion probability of a user? These are the kind of questions addressed in this work. Direct applications involve personalized advertising in social networks. For our study, we crawled a complete connected component of the Flickr friendship graph with a total of 67M edges and 3.9M users. 1.2M of these users had at least one public photograph with valid model metadata, which allowed us to assign camera brands and models to users and time slots. Similarly, we used, where provided in a user's profile, information about a user's geographic location and the groups joined on Flickr. Concerning brand congruence, our main findings are the following. First, a pair of friends on Flickr has a higher probability of being congruent, that is, using the same brand, compared to two random users (27\% vs. 19\%). Second, the degree of congruence goes up for pairs of friends (i) in the same country (29\%), (ii) who both only have very few friends (30\%), and (iii) with a very high cliqueness (38\%). Third, given that a user changes her camera model between March-May 2007 and March-May 2008, high cliqueness friends are more likely than random users to do the same (54\% vs. 48\%). Fourth, users using high-end cameras are far more loyal to their brand than users using point-and-shoot cameras, with a probability of staying with the same brand of 60\% vs 33\%, given that a new camera is bought. Fifth, these expert' users' brand congruence reaches 66\% for high cliqueness friends. All these differences are statistically significant at 1\%. As for the propagation of new models in the friendship graph, we observe the following. First, the growth of connected components of users converted to a particular, new camera model differs distinctly from random growth. Second, the decline of dissemination of a particular model is close to random decline. This illustrates that users influence their friends to change to a particular new model, rather than from a particular old model. Third, having many converted friends increases the probability of the user to convert herself. Here differences between friends from the same or from different countries are more pronounced for point-and-shoot than for digital single-lens reflex users. Fourth, there was again a distinct difference between arbitrary friends and high cliqueness friends in terms of prediction quality for conversion.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Curlango-Rosas:2011:SSA, author = "Cecilia Curlango-Rosas and Gregorio A. Ponce and Gabriel A. Lopez-Morteo", title = "A Specialized Search Assistant for Learning Objects", journal = j-TWEB, volume = "5", number = "4", pages = "21:1--21:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2019643.2019648", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Web holds a great quantity of material that can be used to enhance classroom instruction. However, it is not easy to retrieve this material with the search engines currently available. This study produced a specialized search assistant based on Google that significantly increases the number of instances in which teachers find the desired learning objects as compared to using this popular public search engine directly. Success in finding learning objects by study participants went from 80\% using Google alone to 96\% when using our search assistant in one scenario and, in another scenario, from a 40\% success rate with Google alone to 66\% with our assistant. This specialized search assistant implements features such as bilingual search and term suggestion which were requested by teacher participants to help improve their searches. Study participants evaluated the specialized search assistant and found it significantly easier to use and more useful than the popular search engine for the purpose of finding learning objects.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zhu:2012:CLS, author = "Guangyu Zhu and Gilad Mishne", title = "{ClickRank}: Learning Session-Context Models to Enrich {Web} Search Ranking", journal = j-TWEB, volume = "6", number = "1", pages = "1:1--1:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2109205.2109206", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "User browsing information, particularly non-search-related activity, reveals important contextual information on the preferences and intents of Web users. In this article, we demonstrate the importance of mining general Web user behavior data to improve ranking and other Web-search experience, with an emphasis on analyzing individual user sessions for creating aggregate models. In this context, we introduce ClickRank, an efficient, scalable algorithm for estimating Webpage and Website importance from general Web user-behavior data. We lay out the theoretical foundation of ClickRank based on an intentional surfer model and discuss its properties. We quantitatively evaluate its effectiveness regarding the problem of Web-search ranking, showing that it contributes significantly to retrieval performance as a novel Web-search feature. We demonstrate that the results produced by ClickRank for Web-search ranking are highly competitive with those produced by other approaches, yet achieved at better scalability and substantially lower computational costs. Finally, we discuss novel applications of ClickRank in providing enriched user Web-search experience, highlighting the usefulness of our approach for nonranking tasks.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liu:2012:IWS, author = "Yiqun Liu and Fei Chen and Weize Kong and Huijia Yu and Min Zhang and Shaoping Ma and Liyun Ru", title = "Identifying {Web} Spam with the Wisdom of the Crowds", journal = j-TWEB, volume = "6", number = "1", pages = "2:1--2:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2109205.2109207", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Combating Web spam has become one of the top challenges for Web search engines. State-of-the-art spam-detection techniques are usually designed for specific, known types of Web spam and are incapable of dealing with newly appearing spam types efficiently. With user-behavior analyses from Web access logs, a spam page-detection algorithm is proposed based on a learning scheme. The main contributions are the following. (1) User-visiting patterns of spam pages are studied, and a number of user-behavior features are proposed for separating Web spam pages from ordinary pages. (2) A novel spam-detection framework is proposed that can detect various kinds of Web spam, including newly appearing ones, with the help of the user-behavior analysis. Experiments on large-scale practical Web access log data show the effectiveness of the proposed features and the detection framework.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Mesbah:2012:CAB, author = "Ali Mesbah and Arie van Deursen and Stefan Lenselink", title = "Crawling {Ajax}-Based {Web} Applications through Dynamic Analysis of User Interface State Changes", journal = j-TWEB, volume = "6", number = "1", pages = "3:1--3:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2109205.2109208", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Using JavaScript and dynamic DOM manipulation on the client side of Web applications is becoming a widespread approach for achieving rich interactivity and responsiveness in modern Web applications. At the same time, such techniques---collectively known as Ajax---shatter the concept of webpages with unique URLs, on which traditional Web crawlers are based. This article describes a novel technique for crawling Ajax-based applications through automatic dynamic analysis of user-interface-state changes in Web browsers. Our algorithm scans the DOM tree, spots candidate elements that are capable of changing the state, fires events on those candidate elements, and incrementally infers a state machine that models the various navigational paths and states within an Ajax application. This inferred model can be used in program comprehension and in analysis and testing of dynamic Web states, for instance, or for generating a static version of the application. In this article, we discuss our sequential and concurrent Ajax crawling algorithms. We present our open source tool called Crawljax, which implements the concepts and algorithms discussed in this article. Additionally, we report a number of empirical studies in which we apply our approach to a number of open-source and industrial Web applications and elaborate on the obtained results.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Lauw:2012:QLO, author = "Hady W. Lauw and Ee-Peng Lim and Ke Wang", title = "Quality and Leniency in Online Collaborative Rating Systems", journal = j-TWEB, volume = "6", number = "1", pages = "4:1--4:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2109205.2109209", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The emerging trend of social information processing has resulted in Web users' increased reliance on user-generated content contributed by others for information searching and decision making. Rating scores, a form of user-generated content contributed by reviewers in online rating systems, allow users to leverage others' opinions in the evaluation of objects. In this article, we focus on the problem of summarizing the rating scores given to an object into an overall score that reflects the object's quality. We observe that the existing approaches for summarizing scores largely ignores the effect of reviewers exercising different standards in assigning scores. Instead of treating all reviewers as equals, our approach models the leniency of reviewers, which refers to the tendency of a reviewer to assign higher scores than other coreviewers. Our approach is underlined by two insights: (1) The leniency of a reviewer depends not only on how the reviewer rates objects, but also on how other reviewers rate those objects and (2) The leniency of a reviewer and the quality of rated objects are mutually dependent. We develop the leniency-aware quality, or LQ model, which solves leniency and quality simultaneously. We introduce both an exact and a ranked solution to the model. Experiments on real-life and synthetic datasets show that LQ is more effective than comparable approaches. LQ is also shown to perform consistently better under different parameter settings.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Ashman:2012:E, author = "Helen Ashman and Arun Iyengar and Marc Najork", title = "Editorial", journal = j-TWEB, volume = "6", number = "2", pages = "5:1--5:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180861.2180862", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{DeCapitaniDiVimercati:2012:ITM, author = "Sabrina {De Capitani Di Vimercati} and Sara Foresti and Sushil Jajodia and Stefano Paraboschi and Giuseppe Psaila and Pierangela Samarati", title = "Integrating trust management and access control in data-intensive {Web} applications", journal = j-TWEB, volume = "6", number = "2", pages = "6:1--6:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180861.2180863", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The widespread diffusion of Web-based services provided by public and private organizations emphasizes the need for a flexible solution for protecting the information accessible through Web applications. A promising approach is represented by credential-based access control and trust management. However, although much research has been done and several proposals exist, a clear obstacle to the realization of their benefits in data-intensive Web applications is represented by the lack of adequate support in the DBMSs. As a matter of fact, DBMSs are often responsible for the management of most of the information that is accessed using a Web browser or a Web service invocation. In this article, we aim at eliminating this gap, and present an approach integrating trust management with the access control of the DBMS. We propose a trust model with a SQL syntax and illustrate an algorithm for the efficient verification of a delegation path for certificates. Our solution nicely complements current trust management proposals allowing the efficient realization of the services of an advanced trust management model within current relational DBMSs. An important benefit of our approach lies in its potential for a robust end-to-end design of security for personal data in Web scenario, where vulnerabilities of Web applications cannot be used to violate the protection of the data residing on the database server. We also illustrate the implementation of our approach within an open-source DBMS discussing design choices and performance impact.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Alrifai:2012:HAE, author = "Mohammad Alrifai and Thomas Risse and Wolfgang Nejdl", title = "A hybrid approach for efficient {Web} service composition with end-to-end {QoS} constraints", journal = j-TWEB, volume = "6", number = "2", pages = "7:1--7:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180861.2180864", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Dynamic selection of Web services at runtime is important for building flexible and loosely-coupled service-oriented applications. An abstract description of the required services is provided at design-time, and matching service offers are located at runtime. With the growing number of Web services that provide the same functionality but differ in quality parameters (e.g., availability, response time), a decision needs to be made on which services should be selected such that the user's end-to-end QoS requirements are satisfied. Although very efficient, local selection strategy fails short in handling global QoS requirements. Solutions based on global optimization, on the other hand, can handle global constraints, but their poor performance renders them inappropriate for applications with dynamic and realtime requirements. In this article we address this problem and propose a hybrid solution that combines global optimization with local selection techniques to benefit from the advantages of both worlds. The proposed solution consists of two steps: first, we use mixed integer programming (MIP) to find the optimal decomposition of global QoS constraints into local constraints. Second, we use distributed local selection to find the best Web services that satisfy these local constraints. The results of experimental evaluation indicate that our approach significantly outperforms existing solutions in terms of computation time while achieving close-to-optimal results.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Desnoyers:2012:MAM, author = "Peter Desnoyers and Timothy Wood and Prashant Shenoy and Rahul Singh and Sangameshwar Patil and Harrick Vin", title = "{Modellus}: Automated modeling of complex {Internet} data center applications", journal = j-TWEB, volume = "6", number = "2", pages = "8:1--8:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180861.2180865", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The rising complexity of distributed server applications in Internet data centers has made the tasks of modeling and analyzing their behavior increasingly difficult. This article presents Modellus, a novel system for automated modeling of complex web-based data center applications using methods from queuing theory, data mining, and machine learning. Modellus uses queuing theory and statistical methods to automatically derive models to predict the resource usage of an application and the workload it triggers; these models can be composed to capture multiple dependencies between interacting applications. Model accuracy is maintained by fast, distributed testing, automated relearning of models when they change, and methods to bound prediction errors in composite models. We have implemented a prototype of Modellus, deployed it on a data center testbed, and evaluated its efficacy for modeling and analysis of several distributed multitier web applications. Our results show that this feature-based modeling technique is able to make predictions across several data center tiers, and maintain predictive accuracy (typically 95\% or better) in the face of significant shifts in workload composition; we also demonstrate practical applications of the Modellus system to prediction and provisioning of real-world data center applications.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Aiello:2012:FPH, author = "Luca Maria Aiello and Alain Barrat and Rossano Schifanella and Ciro Cattuto and Benjamin Markines and Filippo Menczer", title = "Friendship prediction and homophily in social media", journal = j-TWEB, volume = "6", number = "2", pages = "9:1--9:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180861.2180866", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Social media have attracted considerable attention because their open-ended nature allows users to create lightweight semantic scaffolding to organize and share content. To date, the interplay of the social and topical components of social media has been only partially explored. Here, we study the presence of homophily in three systems that combine tagging social media with online social networks. We find a substantial level of topical similarity among users who are close to each other in the social network. We introduce a null model that preserves user activity while removing local correlations, allowing us to disentangle the actual local similarity between users from statistical effects due to the assortative mixing of user activity and centrality in the social network. This analysis suggests that users with similar interests are more likely to be friends, and therefore topical similarity measures among users based solely on their annotation metadata should be predictive of social links. We test this hypothesis on several datasets, confirming that social networks constructed from topical similarity capture actual friendship accurately. When combined with topological features, topical similarity achieves a link prediction accuracy of about 92\%.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Comai:2012:MDM, author = "Sara Comai and Davide Mazza", title = "A model-driven methodology to the content layout problem in {Web} applications", journal = j-TWEB, volume = "6", number = "3", pages = "10:1--10:38", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2344416.2344417", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/texbook3.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article presents a model-driven approach for the design of the layout in a complex Web application, where large amounts of data are accessed. The aim of this work is to reduce, as much as possible, repetitive tasks and to factor out common aspects into different kinds of rules that can be reused across different applications. In particular, exploiting the conceptual elements of the typical models used for the design of a Web application, it defines presentation and layout rules at different levels of abstraction and granularity. A procedure for the automatic layout of the content of a page is proposed and evaluated, and the layout of advanced Web applications is discussed.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Automatic contents layout; graphical visualization and rendering; Web applications design", } @Article{Merhav:2012:EIN, author = "Yuval Merhav and Filipe Mesquita and Denilson Barbosa and Wai Gen Yee and Ophir Frieder", title = "Extracting information networks from the blogosphere", journal = j-TWEB, volume = "6", number = "3", pages = "11:1--11:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2344416.2344418", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We study the problem of automatically extracting information networks formed by recognizable entities as well as relations among them from social media sites. Our approach consists of using state-of-the-art natural language processing tools to identify entities and extract sentences that relate such entities, followed by using text-clustering algorithms to identify the relations within the information network. We propose a new term-weighting scheme that significantly improves on the state-of-the-art in the task of relation extraction, both when used in conjunction with the standard tf$ \cdot $idf scheme and also when used as a pruning filter. We describe an effective method for identifying benchmarks for open information extraction that relies on a curated online database that is comparable to the hand-crafted evaluation datasets in the literature. From this benchmark, we derive a much larger dataset which mimics realistic conditions for the task of open information extraction. We report on extensive experiments on both datasets, which not only shed light on the accuracy levels achieved by state-of-the-art open information extraction tools, but also on how to tune such tools for better results.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Miliaraki:2012:FDS, author = "Iris Miliaraki and Manolis Koubarakis", title = "{FoXtrot}: Distributed structural and value {XML} filtering", journal = j-TWEB, volume = "6", number = "3", pages = "12:1--12:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2344416.2344419", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Publish/subscribe systems have emerged in recent years as a promising paradigm for offering various popular notification services. In this context, many XML filtering systems have been proposed to efficiently identify XML data that matches user interests expressed as queries in an XML query language like XPath. However, in order to offer XML filtering functionality on an Internet-scale, we need to deploy such a service in a distributed environment, avoiding bottlenecks that can deteriorate performance. In this work, we design and implement FoXtrot, a system for filtering XML data that combines the strengths of automata for efficient filtering and distributed hash tables for building a fully distributed system. Apart from structural-matching, performed using automata, we also discuss different methods for evaluating value-based predicates. We perform an extensive experimental evaluation of our system, FoXtrot, on a local cluster and on the PlanetLab network and demonstrate that it can index millions of user queries, achieving a high indexing and filtering throughput. At the same time, FoXtrot exhibits very good load-balancing properties and improves its performance as we increase the size of the network.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Dork:2012:NTW, author = "Marian D{\"o}rk and Carey Williamson and Sheelagh Carpendale", title = "Navigating tomorrow's web: From searching and browsing to visual exploration", journal = j-TWEB, volume = "6", number = "3", pages = "13:1--13:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2344416.2344420", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We propose a new way of navigating the Web using interactive information visualizations, and present encouraging results from a large-scale Web study of a visual exploration system. While the Web has become an immense, diverse information space, it has also evolved into a powerful software platform. We believe that the established interaction techniques of searching and browsing do not sufficiently utilize these advances, since information seekers have to transform their information needs into specific, text-based search queries resulting in mostly text-based lists of resources. In contrast, we foresee a new type of information seeking that is high-level and more engaging, by providing the information seeker with interactive visualizations that give graphical overviews and enable query formulation. Building on recent work on faceted navigation, information visualization, and exploratory search, we conceptualize this type of information navigation as visual exploration and evaluate a prototype Web-based system that implements it. We discuss the results of a large-scale, mixed-method Web study that provides a better understanding of the potential benefits of visual exploration on the Web, and its particular performance challenges.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cambazoglu:2012:CBQ, author = "B. Barla Cambazoglu and Ismail Sengor Altingovde and Rifat Ozcan and {\"O}zg{\"u}r Ulusoy", title = "Cache-Based Query Processing for Search Engines", journal = j-TWEB, volume = "6", number = "4", pages = "14:1--14:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382616.2382617", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In practice, a search engine may fail to serve a query due to various reasons such as hardware/network failures, excessive query load, lack of matching documents, or service contract limitations (e.g., the query rate limits for third-party users of a search service). In this kind of scenarios, where the backend search system is unable to generate answers to queries, approximate answers can be generated by exploiting the previously computed query results available in the result cache of the search engine. In this work, we propose two alternative strategies to implement this cache-based query processing idea. The first strategy aggregates the results of similar queries that are previously cached in order to create synthetic results for new queries. The second strategy forms an inverted index over the textual information (i.e., query terms and result snippets) present in the result cache and uses this index to answer new queries. Both approaches achieve reasonable result qualities compared to processing queries with an inverted index built on the collection.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Delac:2012:MSS, author = "Goran Delac and Ivan Budiselic and Ivan Zuzak and Ivan Skuliber and Tomislav Stefanec", title = "A Methodology for {SIP} and {SOAP} Integration Using Application-Specific Protocol Conversion", journal = j-TWEB, volume = "6", number = "4", pages = "15:1--15:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382616.2382618", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In recent years, the ubiquitous demands for cross-protocol application access are driving the need for deeper integration between SIP and SOAP. In this article we present a novel methodology for integrating these two protocols. Through an analysis of properties of SIP and SOAP we show that integration between these protocols should be based on application-specific converters. We describe a generic SIP/SOAP gateway that implements message handling and network and storage management while relying on application-specific converters to define session management and message mapping for a specific set of SIP and SOAP communication nodes. In order to ease development of these converters, we introduce an XML-based domain-specific language for describing application-specific conversion processes. We show how conversion processes can be easily specified in the language using message sequence diagrams of the desired interaction. We evaluate the presented methodology through performance analysis of the developed prototype gateway and high-level comparison with other solutions.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Jeon:2012:WCP, author = "Myeongjae Jeon and Youngjae Kim and Jeaho Hwang and Joonwon Lee and Euiseong Seo", title = "Workload Characterization and Performance Implications of Large-Scale Blog Servers", journal = j-TWEB, volume = "6", number = "4", pages = "16:1--16:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382616.2382619", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "With the ever-increasing popularity of Social Network Services (SNSs), an understanding of the characteristics of these services and their effects on the behavior of their host servers is critical. However, there has been a lack of research on the workload characterization of servers running SNS applications such as blog services. To fill this void, we empirically characterized real-world Web server logs collected from one of the largest South Korean blog hosting sites for 12 consecutive days. The logs consist of more than 96 million HTTP requests and 4.7TB of network traffic. Our analysis reveals the following: (i) The transfer size of nonmultimedia files and blog articles can be modeled using a truncated Pareto distribution and a log-normal distribution, respectively; (ii) user access for blog articles does not show temporal locality, but is strongly biased towards those posted with image or audio files. We additionally discuss the potential performance improvement through clustering of small files on a blog page into contiguous disk blocks, which benefits from the observed file access patterns. Trace-driven simulations show that, on average, the suggested approach achieves 60.6\% better system throughput and reduces the processing time for file access by 30.8\% compared to the best performance of the Ext4 filesystem.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wilson:2012:BSG, author = "Christo Wilson and Alessandra Sala and Krishna P. N. Puttaswamy and Ben Y. Zhao", title = "Beyond Social Graphs: User Interactions in Online Social Networks and their Implications", journal = j-TWEB, volume = "6", number = "4", pages = "17:1--17:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382616.2382620", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Social networks are popular platforms for interaction, communication, and collaboration between friends. Researchers have recently proposed an emerging class of applications that leverage relationships from social networks to improve security and performance in applications such as email, Web browsing, and overlay routing. While these applications often cite social network connectivity statistics to support their designs, researchers in psychology and sociology have repeatedly cast doubt on the practice of inferring meaningful relationships from social network connections alone. This leads to the question: Are social links valid indicators of real user interaction? If not, then how can we quantify these factors to form a more accurate model for evaluating socially enhanced applications?'' In this article, we address this question through a detailed study of user interactions in the Facebook social network. We propose the use of interaction graphs'' to impart meaning to online social links by quantifying user interactions. We analyze interaction graphs derived from Facebook user traces and show that they exhibit significantly lower levels of the small-world'' properties present in their social graph counterparts. This means that these graphs have fewer supernodes'' with extremely high degree, and overall graph diameter increases significantly as a result. To quantify the impact of our observations, we use both types of graphs to validate several well-known social-based applications that rely on graph properties to infuse new functionality into Internet applications, including Reliable Email (RE), SybilGuard, and the weighted cascade influence maximization algorithm. The results reveal new insights into each of these systems, and confirm our hypothesis that to obtain realistic and accurate results, ongoing research on social network applications studies of social applications should use real indicators of user interactions in lieu of social graphs.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Weerkamp:2012:EEC, author = "Wouter Weerkamp and Krisztian Balog and Maarten de Rijke", title = "Exploiting External Collections for Query Expansion", journal = j-TWEB, volume = "6", number = "4", pages = "18:1--18:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382616.2382621", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A persisting challenge in the field of information retrieval is the vocabulary mismatch between a user's information need and the relevant documents. One way of addressing this issue is to apply query modeling: to add terms to the original query and reweigh the terms. In social media, where documents usually contain creative and noisy language (e.g., spelling and grammatical errors), query modeling proves difficult. To address this, attempts to use external sources for query modeling have been made and seem to be successful. In this article we propose a general generative query expansion model that uses external document collections for term generation: the External Expansion Model (EEM). The main rationale behind our model is our hypothesis that each query requires its own mixture of external collections for expansion and that an expansion model should account for this. For some queries we expect, for example, a news collection to be most beneficial, while for other queries we could benefit more by selecting terms from a general encyclopedia. EEM allows for query-dependent weighing of the external collections. We put our model to the test on the task of blog post retrieval and we use four external collections in our experiments: (i) a news collection, (ii) a Web collection, (iii) Wikipedia, and (iv) a blog post collection. Experiments show that EEM outperforms query expansion on the individual collections, as well as the Mixture of Relevance Models that was previously proposed by Diaz and Metzler [2006]. Extensive analysis of the results shows that our naive approach to estimating query-dependent collection importance works reasonably well and that, when we use oracle'' settings, we see the full potential of our model. We also find that the query-dependent collection importance has more impact on retrieval performance than the independent collection importance (i.e., a collection prior).", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wu:2013:MVC, author = "Ou Wu and Weiming Hu and Lei Shi", title = "Measuring the Visual Complexities of {Web} Pages", journal = j-TWEB, volume = "7", number = "1", pages = "1:1--1:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435215.2435216", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Visual complexities (VisComs) of Web pages significantly affect user experience, and automatic evaluation can facilitate a large number of Web-based applications. The construction of a model for measuring the VisComs of Web pages requires the extraction of typical features and learning based on labeled Web pages. However, as far as the authors are aware, little headway has been made on measuring VisCom in Web mining and machine learning. The present article provides a new approach combining Web mining techniques and machine learning algorithms for measuring the VisComs of Web pages. The structure of a Web page is first analyzed, and the layout is then extracted. Using a Web page as a semistructured image, three classes of features are extracted to construct a feature vector. The feature vector is fed into a learned measuring function to calculate the VisCom of the page. In the proposed approach of the present study, the type of the measuring function and its learning depend on the quantification strategy for VisCom. Aside from using a category and a score to represent VisCom as existing work, this study presents a new strategy utilizing a distribution to quantify the VisCom of a Web page. Empirical evaluation suggests the effectiveness of the proposed approach in terms of both features and learning algorithms.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Hanson:2013:PWA, author = "Vicki L. Hanson and John T. Richards", title = "Progress on {Website} Accessibility?", journal = j-TWEB, volume = "7", number = "1", pages = "2:1--2:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435215.2435217", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Over 100 top-traffic and government websites from the United States and United Kingdom were examined for evidence of changes on accessibility indicators over the 14-year period from 1999 to 2012, the longest period studied to date. Automated analyses of WCAG 2.0 Level A Success Criteria found high percentages of violations overall. Unlike more circumscribed studies, however, these sites exhibited improvements over the years on a number of accessibility indicators, with government sites being less likely than topsites to have accessibility violations. Examination of the causes of success and failure suggests that improving accessibility may be due, in part, to changes in website technologies and coding practices rather than a focus on accessibility per se.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Baykan:2013:CST, author = "Eda Baykan and Monika Henzinger and Ingmar Weber", title = "A Comprehensive Study of Techniques for {URL}-Based {Web} Page Language Classification", journal = j-TWEB, volume = "7", number = "1", pages = "3:1--3:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435215.2435218", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Given only the URL of a Web page, can we identify its language? In this article we examine this question. URL-based language classification is useful when the content of the Web page is not available or downloading the content is a waste of bandwidth and time. We built URL-based language classifiers for English, German, French, Spanish, and Italian by applying a variety of algorithms and features. As algorithms we used machine learning algorithms which are widely applied for text classification and state-of-art algorithms for language identification of text. As features we used words, various sized n-grams, and custom-made features (our novel feature set). We compared our approaches with two baseline methods, namely classification by country code top-level domains and classification by IP addresses of the hosting Web servers. We trained and tested our classifiers in a 10-fold cross-validation setup on a dataset obtained from the Open Directory Project and from querying a commercial search engine. We obtained the lowest F1-measure for English (94) and the highest F1-measure for German (98) with the best performing classifiers. We also evaluated the performance of our methods: (i) on a set of Web pages written in Adobe Flash and (ii) as part of a language-focused crawler. In the first case, the content of the Web page is hard to extract and in the second page downloading pages of the wrong'' language constitutes a waste of bandwidth. In both settings the best classifiers have a high accuracy with an F1-measure between 95 (for English) and 98 (for Italian) for the Adobe Flash pages and a precision between 90 (for Italian) and 97 (for French) for the language-focused crawler.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Marriott:2013:HAT, author = "Kim Marriott and Peter Moulder and Nathan Hurst", title = "{HTML} Automatic Table Layout", journal = j-TWEB, volume = "7", number = "1", pages = "4:1--4:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435215.2435219", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Automatic layout of tables is required in online applications because of the need to tailor the layout to the viewport width, choice of font, and dynamic content. However, if the table contains text, minimizing the height of the table for a fixed maximum width is NP-hard. Thus, more efficient heuristic algorithms are required. We evaluate the HTML table layout recommendation and find that while it generally produces quite compact layout it is brittle and can lead to quite uncompact layout. We present an alternate heuristic algorithm. It uses a greedy strategy that starts from the widest reasonable layout and repeatedly chooses to narrow the column for which narrowing leads to the least increase in table height. The algorithm is simple, fast enough to be used in online applications, and gives significantly more compact layout than is obtained with HTML's recommended table layout algorithm.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Anisetti:2013:TBS, author = "Marco Anisetti and Claudio A. Ardagna and Ernesto Damiani and Francesco Saonara", title = "A test-based security certification scheme for {Web} services", journal = j-TWEB, volume = "7", number = "2", pages = "5:1--5:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460384", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Service-Oriented Architecture (SOA) paradigm is giving rise to a new generation of applications built by dynamically composing loosely coupled autonomous services. Clients (i.e., software agents acting on behalf of human users or service providers) implementing such complex applications typically search and integrate services on the basis of their functional requirements and of their trust in the service suppliers. A major issue in this scenario relates to the definition of an assurance technique allowing clients to select services on the basis of their nonfunctional requirements and increasing their confidence that the selected services will satisfy such requirements. In this article, we first present an assurance solution that focuses on security and supports a test-based security certification scheme for Web services. The certification scheme is driven by the security properties to be certified and relies upon a formal definition of the service model. The evidence supporting a certified property is computed using a model-based testing approach that, starting from the service model, automatically generates the test cases to be used in the service certification. We also define a set of indexes and metrics that evaluate the assurance level and the quality of the certification process. Finally, we present our evaluation toolkit and experimental results obtained applying our certification solution to a financial service implementing the Interactive Financial eXchange (IFX) standard.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Victor:2013:ETB, author = "Patricia Victor and Nele Verbiest and Chris Cornelis and Martine {De Cock}", title = "Enhancing the trust-based recommendation process with explicit distrust", journal = j-TWEB, volume = "7", number = "2", pages = "6:1--6:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460385", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "When a Web application with a built-in recommender offers a social networking component which enables its users to form a trust network, it can generate more personalized recommendations by combining user ratings with information from the trust network. These are the so-called trust-enhanced recommendation systems. While research on the incorporation of trust for recommendations is thriving, the potential of explicitly stated distrust remains almost unexplored. In this article, we introduce a distrust-enhanced recommendation algorithm which has its roots in Golbeck's trust-based weighted mean. Through experiments on a set of reviews from Epinions.com, we show that our new algorithm outperforms its standard trust-only counterpart with respect to accuracy, thereby demonstrating the positive effect that explicit distrust can have on trust-based recommendations.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Yue:2013:MSI, author = "Chuan Yue and Haining Wang", title = "A measurement study of insecure {JavaScript} practices on the {Web}", journal = j-TWEB, volume = "7", number = "2", pages = "7:1--7:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460386", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/java2010.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "JavaScript is an interpreted programming language most often used for enhancing webpage interactivity and functionality. It has powerful capabilities to interact with webpage documents and browser windows, however, it has also opened the door for many browser-based security attacks. Insecure engineering practices of using JavaScript may not directly lead to security breaches, but they can create new attack vectors and greatly increase the risks of browser-based attacks. In this article, we present the first measurement study on insecure practices of using JavaScript on the Web. Our focus is on the insecure practices of JavaScript inclusion and dynamic generation, and we examine their severity and nature on 6,805 unique websites. Our measurement results reveal that insecure JavaScript practices are common at various websites: (1) at least 66.4\% of the measured websites manifest the insecure practices of including JavaScript files from external domains into the top-level documents of their webpages; (2) over 44.4\% of the measured websites use the dangerous eval() function to dynamically generate and execute JavaScript code on their webpages; and (3) in JavaScript dynamic generation, using the document.write() method and the innerHTML property is much more popular than using the relatively secure technique of creating script elements via DOM methods. Our analysis indicates that safe alternatives to these insecure practices exist in common cases and ought to be adopted by website developers and administrators for reducing potential security risks.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Su:2013:UQI, author = "Weifeng Su and Hejun Wu and Yafei Li and Jing Zhao and Frederick H. Lochovsky and Hongmin Cai and Tianqiang Huang", title = "Understanding query interfaces by statistical parsing", journal = j-TWEB, volume = "7", number = "2", pages = "8:1--8:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460387", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Users submit queries to an online database via its query interface. Query interface parsing, which is important for many applications, understands the query capabilities of a query interface. Since most query interfaces are organized hierarchically, we present a novel query interface parsing method, StatParser (Statistical Parser), to automatically extract the hierarchical query capabilities of query interfaces. StatParser automatically learns from a set of parsed query interfaces and parses new query interfaces. StatParser starts from a small grammar and enhances the grammar with a set of probabilities learned from parsed query interfaces under the maximum-entropy principle. Given a new query interface, the probability-enhanced grammar identifies the parse tree with the largest global probability to be the query capabilities of the query interface. Experimental results show that StatParser very accurately extracts the query capabilities and can effectively overcome the problems of existing query interface parsers.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Diaz:2013:LEU, author = "Oscar D{\'\i}az and Crist{\'o}bal Arellano and Maider Azanza", title = "A language for end-user {Web} augmentation: Caring for producers and consumers alike", journal = j-TWEB, volume = "7", number = "2", pages = "9:1--9:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460388", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/java2010.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web augmentation is to the Web what augmented reality is to the physical world: layering relevant content/layout/navigation over the existing Web to customize the user experience. This is achieved through JavaScript (JS) using browser weavers (e.g., Greasemonkey). To date, over 43 million of downloads of Greasemonkey scripts ground the vitality of this movement. However, Web augmentation is hindered by being programming intensive and prone to malware. This prevents end-users from participating as both producers and consumers of scripts: producers need to know JS, consumers need to trust JS. This article aims at promoting end-user participation in both roles. The vision is for end-users to prosume (the act of simultaneously caring for producing and consuming) scripts as easily as they currently prosume their pictures or videos. Encouraging production requires more natural'' and abstract constructs. Promoting consumption calls for augmentation scripts to be easier to understand, share, and trust upon. To this end, we explore the use of Domain-Specific Languages (DSLs) by introducing Sticklet. Sticklet is an internal DSL on JS, where JS generality is reduced for the sake of learnability and reliability. Specifically, Web augmentation is conceived as fixing in existing web sites (i.e., the wall ) HTML fragments extracted from either other sites or Web services (i.e., the stickers ). Sticklet targets hobby programmers as producers, and computer literates as consumers. From a producer perspective, benefits are threefold. As a restricted grammar on top of JS, Sticklet expressions are domain oriented and more declarative than their JS counterparts, hence speeding up development. As syntactically correct JS expressions, Sticklet scripts can be installed as traditional scripts and hence, programmers can continue using existing JS tools. As declarative expressions, they are easier to maintain, and amenable for optimization. From a consumer perspective, domain specificity brings understandability (due to declarativeness), reliability (due to built-in security), and consumability'' (i.e., installation/enactment/sharing of Sticklet expressions are tuned to the shortage of time and skills of the target audience). Preliminary evaluations indicate that 77\% of the subjects were able to develop new Sticklet scripts in less than thirty minutes while 84\% were able to consume these scripts in less than ten minutes. Sticklet is available to download as a Mozilla add-on.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Kaldeli:2013:CWS, author = "Eirini Kaldeli and Ehsan Ullah Warriach and Alexander Lazovik and Marco Aiello", title = "Coordinating the web of services for a smart home", journal = j-TWEB, volume = "7", number = "2", pages = "10:1--10:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460389", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Domotics, concerned with the realization of intelligent home environments, is a novel field which can highly benefit from solutions inspired by service-oriented principles to enhance the convenience and security of modern home residents. In this work, we present an architecture for a smart home, starting from the lower device interconnectivity level up to the higher application layers that undertake the load of complex functionalities and provide a number of services to end-users. We claim that in order for smart homes to exhibit a genuinely intelligent behavior, the ability to compute compositions of individual devices automatically and dynamically is paramount. To this end, we incorporate into the architecture a composition component that employs artificial intelligence domain-independent planning to generate compositions at runtime, in a constantly evolving environment. We have implemented a fully working prototype that realizes such an architecture, and have evaluated it both in terms of performance as well as from the end-user point of view. The results of the evaluation show that the service-oriented architectural design and the support for dynamic compositions is quite efficient from the technical point of view, and that the system succeeds in satisfying the expectations and objectives of the users.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Balakrishnan:2013:ART, author = "Raju Balakrishnan and Subbarao Kambhampati and Manishkumar Jha", title = "Assessing relevance and trust of the deep web sources and results based on inter-source agreement", journal = j-TWEB, volume = "7", number = "2", pages = "11:1--11:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460390", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Deep web search engines face the formidable challenge of retrieving high-quality results from the vast collection of searchable databases. Deep web search is a two-step process of selecting the high-quality sources and ranking the results from the selected sources. Though there are existing methods for both the steps, they assess the relevance of the sources and the results using the query-result similarity. When applied to the deep web these methods have two deficiencies. First is that they are agnostic to the correctness (trustworthiness) of the results. Second, the query-based relevance does not consider the importance of the results and sources. These two considerations are essential for the deep web and open collections in general. Since a number of deep web sources provide answers to any query, we conjuncture that the agreements between these answers are helpful in assessing the importance and the trustworthiness of the sources and the results. For assessing source quality, we compute the agreement between the sources as the agreement of the answers returned. While computing the agreement, we also measure and compensate for the possible collusion between the sources. This adjusted agreement is modeled as a graph with sources at the vertices. On this agreement graph, a quality score of a source, that we call SourceRank, is calculated as the stationary visit probability of a random walk. For ranking results, we analyze the second-order agreement between the results. Further extending SourceRank to multidomain search, we propose a source ranking sensitive to the query domains. Multiple domain-specific rankings of a source are computed, and these ranks are combined for the final ranking. We perform extensive evaluations on online and hundreds of Google Base sources spanning across domains. The proposed result and source rankings are implemented in the deep web search engine Factal. We demonstrate that the agreement analysis tracks source corruption. Further, our relevance evaluations show that our methods improve precision significantly over Google Base and the other baseline methods. The result ranking and the domain-specific source ranking are evaluated separately.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Nguyen:2013:FWT, author = "Cam-Tu Nguyen and Natsuda Kaothanthong and Takeshi Tokuyama and Xuan-Hieu Phan", title = "A feature-word-topic model for image annotation and retrieval", journal = j-TWEB, volume = "7", number = "3", pages = "12:1--12:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516634", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Image annotation is a process of finding appropriate semantic labels for images in order to obtain a more convenient way for indexing and searching images on the Web. This article proposes a novel method for image annotation based on combining feature-word distributions, which map from visual space to word space, and word-topic distributions, which form a structure to capture label relationships for annotation. We refer to this type of model as Feature-Word-Topic models. The introduction of topics allows us to efficiently take word associations, such as {ocean, fish, coral} or {desert, sand, cactus}, into account for image annotation. Unlike previous topic-based methods, we do not consider topics as joint distributions of words and visual features, but as distributions of words only. Feature-word distributions are utilized to define weights in computation of topic distributions for annotation. By doing so, topic models in text mining can be applied directly in our method. Our Feature-word-topic model, which exploits Gaussian Mixtures for feature-word distributions, and probabilistic Latent Semantic Analysis (pLSA) for word-topic distributions, shows that our method is able to obtain promising results in image annotation and retrieval.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Vargiu:2013:ICA, author = "Eloisa Vargiu and Alessandro Giuliani and Giuliano Armano", title = "Improving contextual advertising by adopting collaborative filtering", journal = j-TWEB, volume = "7", number = "3", pages = "13:1--13:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516635", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Contextual advertising can be viewed as an information filtering task aimed at selecting suitable ads to be suggested to the final user'', that is, the Web page in hand. Starting from this insight, in this article we propose a novel system, which adopts a collaborative filtering approach to perform contextual advertising. In particular, given a Web page, the system relies on collaborative filtering to classify the page content and to suggest suitable ads accordingly. Useful information is extracted from inlinks'', that is, similar pages that link to the Web page in hand. In so doing, collaborative filtering is used in a content-based setting, giving rise to a hybrid contextual advertising system. After being implemented, the system has been experimented with about 15000 Web pages extracted from the Open Directory Project. Comparative experiments with a content-based system have been performed. The corresponding results highlight that the proposed system performs better. A suitable case study is also provided to enable the reader to better understand how the system works and its effectiveness.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Conti:2013:VPS, author = "Mauro Conti and Arbnor Hasani and Bruno Crispo", title = "Virtual private social networks and a {Facebook} implementation", journal = j-TWEB, volume = "7", number = "3", pages = "14:1--14:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516636", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The popularity of Social Networking Sites (SNS) is growing rapidly, with the largest sites serving hundreds of millions of users and their private information. The privacy settings of these SNSs do not allow the user to avoid sharing some information (e.g., name and profile picture) with all the other users. Also, no matter the privacy settings, this information is always shared with the SNS (that could sell this information or be hacked). To mitigate these threats, we recently introduced the concept of Virtual Private Social Networks (VPSNs). In this work we propose the first complete architecture and implementation of VPSNs for Facebook. In particular, we address an important problem left unexplored in our previous research-that is the automatic propagation of updated profiles to all the members of the same VPSN. Furthermore, we made an in-depth study on performance and implemented several optimization to reduce the impact of VPSN on user experience. The proposed solution is lightweight, completely distributed, does not depend on the collaboration from Facebook, does not have a central point of failure, it offers (with some limitations) the same functionality as Facebook, and apart from some simple settings, the solution is almost transparent to the user. Thorough experiments, with an extended set of parameters, we have confirmed the feasibility of the proposal and have shown a very limited time-overhead experienced by the user while browsing Facebook pages.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cambazoglu:2013:TBI, author = "B. Barla Cambazoglu and Enver Kayaaslan and Simon Jonassen and Cevdet Aykanat", title = "A term-based inverted index partitioning model for efficient distributed query processing", journal = j-TWEB, volume = "7", number = "3", pages = "15:1--15:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516637", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In a shared-nothing, distributed text retrieval system, queries are processed over an inverted index that is partitioned among a number of index servers. In practice, the index is either document-based or term-based partitioned. This choice is made depending on the properties of the underlying hardware infrastructure, query traffic distribution, and some performance and availability constraints. In query processing on retrieval systems that adopt a term-based index partitioning strategy, the high communication overhead due to the transfer of large amounts of data from the index servers forms a major performance bottleneck, deteriorating the scalability of the entire distributed retrieval system. In this work, to alleviate this problem, we propose a novel inverted index partitioning model that relies on hypergraph partitioning. In the proposed model, concurrently accessed index entries are assigned to the same index servers, based on the inverted index access patterns extracted from the past query logs. The model aims to minimize the communication overhead that will be incurred by future queries while maintaining the computational load balance among the index servers. We evaluate the performance of the proposed model through extensive experiments using a real-life text collection and a search query sample. Our results show that considerable performance gains can be achieved relative to the term-based index partitioning strategies previously proposed in literature. In most cases, however, the performance remains inferior to that attained by document-based partitioning.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Weninger:2013:PPF, author = "Tim Weninger and Thomas J. Johnston and Jiawei Han", title = "The parallel path framework for entity discovery on the web", journal = j-TWEB, volume = "7", number = "3", pages = "16:1--16:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516638", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "It has been a dream of the database and Web communities to reconcile the unstructured nature of the World Wide Web with the neat, structured schemas of the database paradigm. Even though databases are currently used to generate Web content in some sites, the schemas of these databases are rarely consistent across a domain. This makes the comparison and aggregation of information from different domains difficult. We aim to make an important step towards resolving this disparity by using the structural and relational information on the Web to (1) extract Web lists, (2) find entity-pages, (3) map entity-pages to a database, and (4) extract attributes of the entities. Specifically, given a Web site and an entity-page (e.g., university department and faculty member home page) we seek to find all of the entity-pages of the same type (e.g., all faculty members in the department), as well as attributes of the specific entities (e.g., their phone numbers, email addresses, office numbers). To do this, we propose a Web structure mining method which grows parallel paths through the Web graph and DOM trees and propagates relevant attribute information forward. We show that by utilizing these parallel paths we can efficiently discover entity-pages and attributes. Finally, we demonstrate the accuracy of our method with a large case study.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liu:2013:SCB, author = "Liwei Liu and Freddy Lecue and Nikolay Mehandjiev", title = "Semantic content-based recommendation of software services using context", journal = j-TWEB, volume = "7", number = "3", pages = "17:1--17:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516639", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The current proliferation of software services means users should be supported when selecting one service out of the many which meet their needs. Recommender Systems provide such support for selecting products and conventional services, yet their direct application to software services is not straightforward, because of the current scarcity of available user feedback, and the need to fine-tune software services to the context of intended use. In this article, we address these issues by proposing a semantic content-based recommendation approach that analyzes the context of intended service use to provide effective recommendations in conditions of scarce user feedback. The article ends with two experiments based on a realistic set of semantic services. The first experiment demonstrates how the proposed semantic content-based approach can produce effective recommendations using semantic reasoning over service specifications by comparing it with three other approaches. The second experiment demonstrates the effectiveness of the proposed context analysis mechanism by comparing the performance of both context-aware and plain versions of our semantic content-based approach, benchmarked against user-performed selection informed by context.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Jiang:2013:ULI, author = "Jing Jiang and Christo Wilson and Xiao Wang and Wenpeng Sha and Peng Huang and Yafei Dai and Ben Y. Zhao", title = "Understanding latent interactions in online social networks", journal = j-TWEB, volume = "7", number = "4", pages = "18:1--18:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2517040", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Popular online social networks (OSNs) like Facebook and Twitter are changing the way users communicate and interact with the Internet. A deep understanding of user interactions in OSNs can provide important insights into questions of human social behavior and into the design of social platforms and applications. However, recent studies have shown that a majority of user interactions on OSNs are latent interactions, that is, passive actions, such as profile browsing, that cannot be observed by traditional measurement techniques. In this article, we seek a deeper understanding of both active and latent user interactions in OSNs. For quantifiable data on latent user interactions, we perform a detailed measurement study on Renren, the largest OSN in China with more than 220 million users to date. All friendship links in Renren are public, allowing us to exhaustively crawl a connected graph component of 42 million users and 1.66 billion social links in 2009. Renren also keeps detailed, publicly viewable visitor logs for each user profile. We capture detailed histories of profile visits over a period of 90 days for users in the Peking University Renren network and use statistics of profile visits to study issues of user profile popularity, reciprocity of profile visits, and the impact of content updates on user popularity. We find that latent interactions are much more prevalent and frequent than active events, are nonreciprocal in nature, and that profile popularity is correlated with page views of content rather than with quantity of content updates. Finally, we construct latent interaction graphs as models of user browsing behavior and compare their structural properties, evolution, community structure, and mixing times against those of both active interaction graphs and social graphs.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Quarteroni:2013:BKA, author = "Silvia Quarteroni and Marco Brambilla and Stefano Ceri", title = "A bottom-up, knowledge-aware approach to integrating and querying {Web} data services", journal = j-TWEB, volume = "7", number = "4", pages = "19:1--19:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2493536", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "As a wealth of data services is becoming available on the Web, building and querying Web applications that effectively integrate their content is increasingly important. However, schema integration and ontology matching with the aim of registering data services often requires a knowledge-intensive, tedious, and error-prone manual process. We tackle this issue by presenting a bottom-up, semi-automatic service registration process that refers to an external knowledge base and uses simple text processing techniques in order to minimize and possibly avoid the contribution of domain experts in the annotation of data services. The first by-product of this process is a representation of the domain of data services as an entity-relationship diagram, whose entities are named after concepts of the external knowledge base matching service terminology rather than being manually created to accommodate an application-specific ontology. Second, a three-layer annotation of service semantics (service interfaces, access patterns, service marts) describing how services play'' with such domain elements is also automatically constructed at registration time. When evaluated against heterogeneous existing data services and with a synthetic service dataset constructed using Google Fusion Tables, the approach yields good results in terms of data representation accuracy. We subsequently demonstrate that natural language processing methods can be used to decompose and match simple queries to the data services represented in three layers according to the preceding methodology with satisfactory results. We show how semantic annotations are used at query time to convert the user's request into an executable logical query. Globally, our findings show that the proposed registration method is effective in creating a uniform semantic representation of data services, suitable for building Web applications and answering search queries.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Leiva:2013:WBB, author = "Luis A. Leiva and Roberto Viv{\'o}", title = "{Web} browsing behavior analysis and interactive hypervideo", journal = j-TWEB, volume = "7", number = "4", pages = "20:1--20:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2529995.2529996", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Processing data on any sort of user interaction is well known to be cumbersome and mostly time consuming. In order to assist researchers in easily inspecting fine-grained browsing data, current tools usually display user interactions as mouse cursor tracks, a video-like visualization scheme. However, to date, traditional online video inspection has not explored the full capabilities of hypermedia and interactive techniques. In response to this need, we have developed SMT2 \epsilon , a Web-based tracking system for analyzing browsing behavior using feature-rich hypervideo visualizations. We compare our system to related work in academia and the industry, showing that ours features unprecedented visualization capabilities. We also show that SMT2 \epsilon efficiently captures browsing data and is perceived by users to be both helpful and usable. A series of prediction experiments illustrate that raw cursor data are accessible and can be easily handled, providing evidence that the data can be used to construct and verify research hypotheses. Considering its limitations, it is our hope that SMT2 \epsilon will assist researchers, usability practitioners, and other professionals interested in understanding how users browse the Web.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bing:2013:RDS, author = "Lidong Bing and Wai Lam and Tak-Lam Wong", title = "Robust detection of semi-structured web records using a {DOM} structure-knowledge-driven model", journal = j-TWEB, volume = "7", number = "4", pages = "21:1--21:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2508434", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web data record extraction aims at extracting a set of similar object records from a single webpage. These records have similar attributes or fields and are presented with a regular format in a coherent region of the page. To tackle this problem, most existing works analyze the DOM tree of an input page. One major limitation of these methods is that the lack of a global view in detecting data records from an input page results in a myopic decision. Their brute-force searching manner in detecting various types of records degrades the flexibility and robustness. We propose a Structure-Knowledge-Oriented Global Analysis (Skoga) framework which can perform robust detection of different-kinds of data records and record regions. The major component of the Skoga framework is a DOM structure-knowledge-driven detection model which can conduct a global analysis on the DOM structure to achieve effective detection. The DOM structure knowledge consists of background knowledge as well as statistical knowledge capturing different characteristics of data records and record regions, as exhibited in the DOM structure. The background knowledge encodes the semantics of labels indicating general constituents of data records and regions. The statistical knowledge is represented by some carefully designed features that capture different characteristics of a single node or a node group in the DOM. The feature weights are determined using a development dataset via a parameter estimation algorithm based on a structured output support vector machine. An optimization method based on the divide-and-conquer principle is developed making use of the DOM structure knowledge to quantitatively infer and recognize appropriate records and regions for a page. Extensive experiments have been conducted on four datasets. The experimental results demonstrate that our framework achieves higher accuracy compared with state-of-the-art methods.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liao:2013:VAC, author = "Zhen Liao and Daxin Jiang and Jian Pei and Yalou Huang and Enhong Chen and Huanhuan Cao and Hang Li", title = "A {vlHMM} approach to context-aware search", journal = j-TWEB, volume = "7", number = "4", pages = "22:1--22:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2490255", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Capturing the context of a user's query from the previous queries and clicks in the same session leads to a better understanding of the user's information need. A context-aware approach to document reranking, URL recommendation, and query suggestion may substantially improve users' search experience. In this article, we propose a general approach to context-aware search by learning a variable length hidden Markov model ( vlHMM ) from search sessions extracted from log data. While the mathematical model is powerful, the huge amounts of log data present great challenges. We develop several distributed learning techniques to learn a very large vlHMM under the map-reduce framework. Moreover, we construct feature vectors for each state of the vlHMM model to handle users' novel queries not covered by the training data. We test our approach on a raw dataset consisting of 1.9 billion queries, 2.9 billion clicks, and 1.2 billion search sessions before filtering, and evaluate the effectiveness of the vlHMM learned from the real data on three search applications: document reranking, query suggestion, and URL recommendation. The experiment results validate the effectiveness of vlHMM in the applications of document reranking, URL recommendation, and query suggestion.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{White:2013:CBD, author = "Ryen W. White and Eric Horvitz", title = "Captions and biases in diagnostic search", journal = j-TWEB, volume = "7", number = "4", pages = "23:1--23:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2486040", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "People frequently turn to the Web with the goal of diagnosing medical symptoms. Studies have shown that diagnostic search can often lead to anxiety about the possibility that symptoms are explained by the presence of rare, serious medical disorders, rather than far more common benign syndromes. We study the influence of the appearance of potentially-alarming content, such as severe illnesses or serious treatment options associated with the queried for symptoms, in captions comprising titles, snippets, and URLs. We explore whether users are drawn to results with potentially-alarming caption content, and if so, the implications of such attraction for the design of search engines. We specifically study the influence of the content of search result captions shown in response to symptom searches on search-result click-through behavior. We show that users are significantly more likely to examine and click on captions containing potentially-alarming medical terminology such as heart attack'' or medical emergency'' independent of result rank position and well-known positional biases in users' search examination behaviors. The findings provide insights about the possible effects of displaying implicit correlates of searchers' goals in search-result captions, such as unexpressed concerns and fears. As an illustration of the potential utility of these results, we developed and evaluated an enhanced click prediction model that incorporates potentially-alarming caption features and show that it significantly outperforms models that ignore caption content. Beyond providing additional understanding of the effects of Web content on medical concerns, the methods and findings have implications for search engine design. As part of our discussion on the implications of this research, we propose procedures for generating more representative captions that may be less likely to cause alarm, as well as methods for learning to more appropriately rank search results from logged search behavior, for examples, by also considering the presence of potentially-alarming content in the captions that motivate observed clicks and down-weighting clicks seemingly driven by searchers' health anxieties.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Lee:2013:SCA, author = "Jung-Hyun Lee and Jongwoo Ha and Jin-Yong Jung and Sangkeun Lee", title = "Semantic contextual advertising based on the open directory project", journal = j-TWEB, volume = "7", number = "4", pages = "24:1--24:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2529995.2529997", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Contextual advertising seeks to place relevant textual ads within the content of generic webpages. In this article, we explore a novel semantic approach to contextual advertising. This consists of three tasks: (1) building a well-organized hierarchical taxonomy of topics, (2) developing a robust classifier for effectively finding the topics of pages and ads, and (3) ranking ads based on the topical relevance to pages. First, we heuristically build our own taxonomy of topics from the Open Directory Project (ODP). Second, we investigate how to increase classification accuracy by taking the unique characteristics of the ODP into account. Last, we measure the topical relevance of ads by applying a link analysis technique to the similarity graph carefully derived from our taxonomy. Experiments show that our classification method improves the performance of Ma- F$_1$by as much as 25.7\% over the baseline classifier. In addition, our ranking method enhances the relevance of ads substantially, up to 10\% in terms of precision at k, compared to a representative strategy.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Huang:2013:UEQ, author = "Xiaodi Huang", title = "{UsageQoS}: Estimating the {QoS} of {Web} Services through Online User Communities", journal = j-TWEB, volume = "8", number = "1", pages = "1:1--1:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2532635", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Services are an indispensable component in cloud computing. Web services are particularly important. As an increasing number of Web services provides equivalent functions, one common issue faced by users is the selection of the most appropriate one based on quality. This article presents a conceptual framework that characterizes the quality of Web services, an algorithm that quantifies them, and a system architecture that ranks Web services by using the proposed algorithm. In particular, the algorithm, called UsageQoS that computes the scores of quality of service (QoS) of Web services within a community, makes use of the usage frequencies of Web services. The frequencies are defined as the numbers of times invoked by other services in a given time period. The UsageQoS algorithm is able to optionally take user ratings as its initial input. The proposed approach has been validated by extensively experimenting on several datasets, including two real datasets. The results of the experiments have demonstrated that our approach is capable of estimating QoS parameters of Web services, regardless of whether user ratings are available or not.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Weber:2013:FBW, author = "Ingo Weber and Hye-Young Paik and Boualem Benatallah", title = "Form-Based {Web} Service Composition for Domain Experts", journal = j-TWEB, volume = "8", number = "1", pages = "2:1--2:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2542168", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In many cases, it is not cost effective to automate business processes which affect a small number of people and/or change frequently. We present a novel approach for enabling domain experts to model and deploy such processes from their respective domain as Web service compositions. The approach builds on user-editable service, naming and representing Web services as forms. On this basis, the approach provides a visual composition language with a targeted restriction of control-flow expressivity, process simulation, automated process verification mechanisms, and code generation for executing orchestrations. A Web-based service composition prototype implements this approach, including a WS-BPEL code generator. A small lab user study with 14 participants showed promising results for the usability of the system, even for nontechnical domain experts.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Ozcan:2013:SCH, author = "Rifat Ozcan and Ismail Sengor Altingovde and B. Barla Cambazoglu and {\"O}zg{\"u}r Ulusoy", title = "Second Chance: a Hybrid Approach for Dynamic Result Caching and Prefetching in Search Engines", journal = j-TWEB, volume = "8", number = "1", pages = "3:1--3:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536777", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web search engines are known to cache the results of previously issued queries. The stored results typically contain the document summaries and some data that is used to construct the final search result page returned to the user. An alternative strategy is to store in the cache only the result document IDs, which take much less space, allowing results of more queries to be cached. These two strategies lead to an interesting trade-off between the hit rate and the average query response latency. In this work, in order to exploit this trade-off, we propose a hybrid result caching strategy where a dynamic result cache is split into two sections: an HTML cache and a docID cache. Moreover, using a realistic cost model, we evaluate the performance of different result prefetching strategies for the proposed hybrid cache and the baseline HTML-only cache. Finally, we propose a machine learning approach to predict singleton queries, which occur only once in the query stream. We show that when the proposed hybrid result caching strategy is coupled with the singleton query predictor, the hit rate is further improved.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Sherkat:2013:ETS, author = "Reza Sherkat and Jing Li and Nikos Mamoulis", title = "Efficient Time-Stamped Event Sequence Anonymization", journal = j-TWEB, volume = "8", number = "1", pages = "4:1--4:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2532643", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "With the rapid growth of applications which generate timestamped sequences (click streams, GPS trajectories, RFID sequences), sequence anonymization has become an important problem, in that should such data be published or shared. Existing trajectory anonymization techniques disregard the importance of time or the sensitivity of events. This article is the first, to our knowledge, thorough study on time-stamped event sequence anonymization. We propose a novel and tunable generalization framework tailored to event sequences. We generalize time stamps using time intervals and events using a taxonomy which models the domain semantics. We consider two scenarios: (i) sharing the data with a single receiver (the SSR setting), where the receiver's background knowledge is confined to a set of time stamps and time generalization suffices, and (ii) sharing the data with colluding receivers (the SCR setting), where time generalization should be combined with event generalization. For both cases, we propose appropriate anonymization methods that prevent both user identification and event prediction. To achieve computational efficiency and scalability, we propose optimization techniques for both cases using a utility-based index, compact summaries, fast to compute bounds for utility, and a novel taxonomy-aware distance function. Extensive experiments confirm the effectiveness of our approach compared with state of the art, in terms of information loss, range query distortion, and preserving temporal causality patterns. Furthermore, our experiments demonstrate efficiency and scalability on large-scale real and synthetic datasets.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bellido:2013:CFP, author = "Jesus Bellido and Rosa Alarc{\'o}n and Cesare Pautasso", title = "Control-Flow Patterns for Decentralized {RESTful} Service Composition", journal = j-TWEB, volume = "8", number = "1", pages = "5:1--5:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2535911", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The REST architectural style has attracted a lot of interest from industry due to the nonfunctional properties it contributes to Web-based solutions. SOAP/WSDL-based services, on the other hand, provide tools and methodologies that allow the design and development of software supporting complex service arrangements, enabling complex business processes which make use of well-known control-flow patterns. It is not clear if and how such patterns should be modeled, considering RESTful Web services that comply with the statelessness, uniform interface and hypermedia constraints. In this article, we analyze a set of fundamental control-flow patterns in the context of stateless compositions of RESTful services. We propose a means of enabling their implementation using the HTTP protocol and discuss the impact of our design choices according to key REST architectural principles. We hope to shed new light on the design of basic building blocks for RESTful business processes.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Chelaru:2013:ADE, author = "Sergiu Chelaru and Ismail Sengor Altingovde and Stefan Siersdorfer and Wolfgang Nejdl", title = "Analyzing, Detecting, and Exploiting Sentiment in {Web} Queries", journal = j-TWEB, volume = "8", number = "1", pages = "6:1--6:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2535525", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Web contains an increasing amount of biased and opinionated documents on politics, products, and polarizing events. In this article, we present an indepth analysis of Web search queries for controversial topics, focusing on query sentiment. To this end, we conduct extensive user assessments and discriminative term analyses, as well as a sentiment analysis using the SentiWordNet thesaurus, a lexical resource containing sentiment annotations. Furthermore, in order to detect the sentiment expressed in queries, we build different classifiers based on query texts, query result titles, and snippets. We demonstrate the virtue of query sentiment detection in two different use cases. First, we define a query recommendation scenario that employs sentiment detection of results to recommend additional queries for polarized queries issued by search engine users. The second application scenario is controversial topic discovery, where query sentiment classifiers are employed to discover previously unknown topics that trigger both highly positive and negative opinions among the users of a search engine. For both use cases, the results of our evaluations on real-world data are promising and show the viability and potential of query sentiment analysis in practical scenarios.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Torres:2014:ASB, author = "Sergio Duarte Torres and Ingmar Weber and Djoerd Hiemstra", title = "Analysis of Search and Browsing Behavior of Young Users on the {Web}", journal = j-TWEB, volume = "8", number = "2", pages = "7:1--7:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2555595", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Internet is increasingly used by young children for all kinds of purposes. Nonetheless, there are not many resources especially designed for children on the Internet and most of the content online is designed for grown-up users. This situation is problematic if we consider the large differences between young users and adults since their topic interests, computer skills, and language capabilities evolve rapidly during childhood. There is little research aimed at exploring and measuring the difficulties that children encounter on the Internet when searching for information and browsing for content. In the first part of this work, we employed query logs from a commercial search engine to quantify the difficulties children of different ages encounter on the Internet and to characterize the topics that they search for. We employed query metrics (e.g., the fraction of queries posed in natural language), session metrics (e.g., the fraction of abandoned sessions), and click activity (e.g., the fraction of ad clicks). The search logs were also used to retrace stages of child development. Concretely, we looked for changes in interests (e.g., the distribution of topics searched) and language development (e.g., the readability of the content accessed and the vocabulary size). In the second part of this work, we employed toolbar logs from a commercial search engine to characterize the browsing behavior of young users, particularly to understand the activities on the Internet that trigger search. We quantified the proportion of browsing and search activity in the toolbar sessions and we estimated the likelihood of a user to carry out search on the Web vertical and multimedia verticals (i.e., videos and images) given that the previous event is another search event or a browsing event. We observed that these metrics clearly demonstrate an increased level of confusion and unsuccessful search sessions among children. We also found a clear relation between the reading level of the clicked pages and characteristics of the users such as age and educational attainment. In terms of browsing behavior, children were found to start their activities on the Internet with a search engine (instead of directly browsing content) more often than adults. We also observed a significantly larger amount of browsing activity for the case of teenager users. Interestingly we also found that if children visit knowledge-related Web sites (i.e., information-dense pages such as Wikipedia articles), they subsequently do more Web searches than adults. Additionally, children and especially teenagers were found to have a greater tendency to engage in multimedia search, which calls to improve the aggregation of multimedia results into the current search result pages.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Su:2014:HIY, author = "Ao-Jan Su and Y. Charlie Hu and Aleksandar Kuzmanovic and Cheng-Kok Koh", title = "How to Improve Your Search Engine Ranking: Myths and Reality", journal = j-TWEB, volume = "8", number = "2", pages = "8:1--8:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2579990", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Search engines have greatly influenced the way people access information on the Internet, as such engines provide the preferred entry point to billions of pages on the Web. Therefore, highly ranked Web pages generally have higher visibility to people and pushing the ranking higher has become the top priority for Web masters. As a matter of fact, Search Engine Optimization (SEO) has became a sizeable business that attempts to improve their clients' ranking. Still, the lack of ways to validate SEO's methods has created numerous myths and fallacies associated with ranking algorithms. In this article, we focus on two ranking algorithms, Google's and Bing's, and design, implement, and evaluate a ranking system to systematically validate assumptions others have made about these popular ranking algorithms. We demonstrate that linear learning models, coupled with a recursive partitioning ranking scheme, are capable of predicting ranking results with high accuracy. As an example, we manage to correctly predict 7 out of the top 10 pages for 78\% of evaluated keywords. Moreover, for content-only ranking, our system can correctly predict 9 or more pages out of the top 10 ones for 77\% of search terms. We show how our ranking system can be used to reveal the relative importance of ranking features in a search engine's ranking function, provide guidelines for SEOs and Web masters to optimize their Web pages, validate or disprove new ranking features, and evaluate search engine ranking results for possible ranking bias.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Sirivianos:2014:LSF, author = "Michael Sirivianos and Kyungbaek Kim and Jian Wei Gan and Xiaowei Yang", title = "Leveraging Social Feedback to Verify Online Identity Claims", journal = j-TWEB, volume = "8", number = "2", pages = "9:1--9:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2543711", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Anonymity is one of the main virtues of the Internet, as it protects privacy and enables users to express opinions more freely. However, anonymity hinders the assessment of the veracity of assertions that online users make about their identity attributes, such as age or profession. We propose FaceTrust, a system that uses online social networks to provide lightweight identity credentials while preserving a user's anonymity. FaceTrust employs a game with a purpose'' design to elicit the opinions of the friends of a user about the user's self-claimed identity attributes, and uses attack-resistant trust inference to assign veracity scores to identity attribute assertions. FaceTrust provides credentials, which a user can use to corroborate his assertions. We evaluate our proposal using a live Facebook deployment and simulations on a crawled social graph. The results show that our veracity scores are strongly correlated with the ground truth, even when dishonest users make up a large fraction of the social network and employ the Sybil attack.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Pugliese:2014:EMM, author = "Andrea Pugliese and Matthias Br{\"o}cheler and V. S. Subrahmanian and Michael Ovelg{\"o}nne", title = "Efficient {MultiView} Maintenance under Insertion in Huge Social Networks", journal = j-TWEB, volume = "8", number = "2", pages = "10:1--10:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2541290", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Applications to monitor various aspects of social networks are becoming increasingly popular. For instance, marketers want to look for semantic patterns relating to the content of tweets and Facebook posts relating to their products. Law enforcement agencies want to track behaviors involving potential criminals on the Internet by looking for certain patterns of behavior. Music companies want to track patterns of spread of illegal music. These applications allow multiple users to specify patterns of interest and monitor them in real time as new data gets added to the Web or to a social network. In this article we develop the concept of social network view servers in which all of these types of applications can be simultaneously monitored. The patterns of interest are expressed as views over an underlying graph or social network database. We show that a given set of views can be compiled in multiple possible ways to take advantage of common substructures and define the concept of an optimal merge. Though finding an optimal merge is shown to be NP-hard, we develop the AddView to find very good merges quickly. We develop a very fast MultiView algorithm that scalably and efficiently maintains multiple subgraph views when insertions are made to the social network database. We show that our algorithm is correct, study its complexity, and experimentally demonstrate that our algorithm can scalably handle updates to hundreds of views on 6 real-world social network databases with up to 540M edges.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bislimovska:2014:TCB, author = "Bojana Bislimovska and Alessandro Bozzon and Marco Brambilla and Piero Fraternali", title = "Textual and Content-Based Search in Repositories of {Web} Application Models", journal = j-TWEB, volume = "8", number = "2", pages = "11:1--11:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2579991", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Model-driven engineering relies on collections of models, which are the primary artifacts for software development. To enable knowledge sharing and reuse, models need to be managed within repositories, where they can be retrieved upon users' queries. This article examines two different techniques for indexing and searching model repositories, with a focus on Web development projects encoded in a domain-specific language. Keyword-based and content-based search (also known as query-by-example) are contrasted with respect to the architecture of the system, the processing of models and queries, and the way in which metamodel knowledge can be exploited to improve search. A thorough experimental evaluation is conducted to examine what parameter configurations lead to better accuracy and to offer an insight in what queries are addressed best by each system.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bellogin:2014:NSW, author = "Alejandro Bellog{\'\i}n and Pablo Castells and Iv{\'a}n Cantador", title = "Neighbor Selection and Weighting in User-Based Collaborative Filtering: a Performance Prediction Approach", journal = j-TWEB, volume = "8", number = "2", pages = "12:1--12:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2579993", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "User-based collaborative filtering systems suggest interesting items to a user relying on similar-minded people called neighbors. The selection and weighting of these neighbors characterize the different recommendation approaches. While standard strategies perform a neighbor selection based on user similarities, trust-aware recommendation algorithms rely on other aspects indicative of user trust and reliability. In this article we restate the trust-aware recommendation problem, generalizing it in terms of performance prediction techniques, whose goal is to predict the performance of an information retrieval system in response to a particular query. We investigate how to adopt the preceding generalization to define a unified framework where we conduct an objective analysis of the effectiveness (predictive power) of neighbor scoring functions. The proposed framework enables discriminating whether recommendation performance improvements are caused by the used neighbor scoring functions or by the ways these functions are used in the recommendation computation. We evaluated our approach with several state-of-the-art and novel neighbor scoring functions on three publicly available datasets. By empirically comparing four neighbor quality metrics and thirteen performance predictors, we found strong predictive power for some of the predictors with respect to certain metrics. This result was then validated by checking the final performance of recommendation strategies where predictors are used for selecting and/or weighting user neighbors. As a result, we have found that, by measuring the predictive power of neighbor performance predictors, we are able to anticipate which predictors are going to perform better in neighbor-scoring-powered versions of a user-based collaborative filtering algorithm.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Qian:2014:FTD, author = "Yi Qian and Sibel Adali", title = "Foundations of Trust and Distrust in Networks: Extended Structural Balance Theory", journal = j-TWEB, volume = "8", number = "3", pages = "13:1--13:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2628438", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Modeling trust in very large social networks is a hard problem due to the highly noisy nature of these networks that span trust relationships from many different contexts, based on judgments of reliability, dependability, and competence. Furthermore, relationships in these networks vary in their level of strength. In this article, we introduce a novel extension of structural balance theory as a foundational theory of trust and distrust in networks. Our theory preserves the distinctions between trust and distrust as suggested in the literature, but also incorporates the notion of relationship strength that can be expressed as either discrete categorical values, as pairwise comparisons, or as metric distances. Our model is novel, has sound social and psychological basis, and captures the classical balance theory as a special case. We then propose a convergence model, describing how an imbalanced network evolves towards new balance, and formulate the convergence problem of a social network as a Metric Multidimensional Scaling (MDS) optimization problem. Finally, we show how the convergence model can be used to predict edge signs in social networks and justify our theory through extensive experiments on real datasets.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Soi:2014:CDC, author = "Stefano Soi and Florian Daniel and Fabio Casati", title = "Conceptual Development of Custom, Domain-Specific Mashup Platforms", journal = j-TWEB, volume = "8", number = "3", pages = "14:1--14:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2628439", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Despite the common claim by mashup platforms that they enable end-users to develop their own software, in practice end-users still don't develop their own mashups, as the highly technical or inexistent [sic] user bases of today's mashup platforms testify. The key shortcoming of current platforms is their general-purpose nature, that privileges expressive power over intuitiveness. In our prior work, we have demonstrated that a domain-specific mashup approach, which privileges intuitiveness over expressive power, has much more potential to enable end-user development (EUD). The problem is that developing mashup platforms-domain-specific or not-is complex and time consuming. In addition, domain-specific mashup platforms by their very nature target only a small user basis, that is, the experts of the target domain, which makes their development not sustainable if it is not adequately supported and automated. With this article, we aim to make the development of custom, domain-specific mashup platforms cost-effective. We describe a mashup tool development kit (MDK) that is able to automatically generate a mashup platform (comprising custom mashup and component description languages and design-time and runtime environments) from a conceptual design and to provision it as a service. We equip the kit with a dedicated development methodology and demonstrate the applicability and viability of the approach with the help of two case studies.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zhang:2014:PBT, author = "Xianchao Zhang and You Wang and Nan Mou and Wenxin Liang", title = "Propagating Both Trust and Distrust with Target Differentiation for Combating Link-Based {Web} Spam", journal = j-TWEB, volume = "8", number = "3", pages = "15:1--15:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2628440", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Semi-automatic anti-spam algorithms propagate either trust through links from a good seed set (e.g., TrustRank) or distrust through inverse links from a bad seed set (e.g., Anti-TrustRank) to the entire Web. These kinds of algorithms have shown their powers in combating link-based Web spam since they integrate both human judgement and machine intelligence. Nevertheless, there is still much space for improvement. One issue of most existing trust/distrust propagation algorithms is that only trust or distrust is propagated and only a good seed set or a bad seed set is used. According to Wu et al. [2006a], a combined usage of both trust and distrust propagation can lead to better results, and an effective framework is needed to realize this insight. Another more serious issue of existing algorithms is that trust or distrust is propagated in nondifferential ways, that is, a page propagates its trust or distrust score uniformly to its neighbors, without considering whether each neighbor should be trusted or distrusted. Such kinds of blind propagating schemes are inconsistent with the original intention of trust/distrust propagation. However, it seems impossible to implement differential propagation if only trust or distrust is propagated. In this article, we take the view that each Web page has both a trustworthy side and an untrustworthy side, and we thusly assign two scores to each Web page: T-Rank, scoring the trustworthiness of the page, and D-Rank, scoring the untrustworthiness of the page. We then propose an integrated framework that propagates both trust and distrust. In the framework, the propagation of T-Rank/D-Rank is penalized by the target's current D-Rank/T-Rank. In other words, the propagation of T-Rank/D-Rank is decided by the target's current (generalized) probability of being trustworthy/untrustworthy; thus a page propagates more trust/distrust to a trustworthy/untrustworthy neighbor than to an untrustworthy/trustworthy neighbor. In this way, propagating both trust and distrust with target differentiation is implemented. We use T-Rank scores to realize spam demotion and D-Rank scores to accomplish spam detection. The proposed Trust-DistrustRank (TDR) algorithm regresses to TrustRank and Anti-TrustRank when the penalty factor is set to 1 and 0, respectively. Thus TDR could be seen as a combinatorial generalization of both TrustRank and Anti-TrustRank. TDR not only makes full use of both trust and distrust propagation, but also overcomes the disadvantages of both TrustRank and Anti-TrustRank. Experimental results on benchmark datasets show that TDR outperforms other semi-automatic anti-spam algorithms for both spam demotion and spam detection tasks under various criteria.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Margaritis:2014:ITI, author = "Giorgos Margaritis and Stergios V. Anastasiadis", title = "Incremental Text Indexing for Fast Disk-Based Search", journal = j-TWEB, volume = "8", number = "3", pages = "16:1--16:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560800", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Real-time search requires to incrementally ingest content updates and almost immediately make them searchable while serving search queries at low latency. This is currently feasible for datasets of moderate size by fully maintaining the index in the main memory of multiple machines. Instead, disk-based methods for incremental index maintenance substantially increase search latency with the index fragmented across multiple disk locations. For the support of fast search over disk-based storage, we take a fresh look at incremental text indexing in the context of current architectural features. We introduce a greedy method called Selective Range Flush (SRF) to contiguously organize the index over disk blocks and dynamically update it at low cost. We show that SRF requires substantial experimental effort to tune specific parameters for performance efficiency. Subsequently, we propose the Unified Range Flush (URF) method, which is conceptually simpler than SRF, achieves similar or better performance with fewer parameters and less tuning, and is amenable to I/O complexity analysis. We implement interesting variations of the two methods in the Proteus prototype search engine that we developed and do extensive experiments with three different Web datasets of size up to 1TB. Across different systems, we show that our methods offer search latency that matches or reduces up to half the lowest achieved by existing disk-based methods. In comparison to an existing method of comparable search latency on the same system, our methods reduce by a factor of 2.0--2.4 the I/O part of build time and by 21--24\% the total build time.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Siersdorfer:2014:AMC, author = "Stefan Siersdorfer and Sergiu Chelaru and Jose {San Pedro} and Ismail Sengor Altingovde and Wolfgang Nejdl", title = "Analyzing and Mining Comments and Comment Ratings on the Social {Web}", journal = j-TWEB, volume = "8", number = "3", pages = "17:1--17:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2628441", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "An analysis of the social video sharing platform YouTube and the news aggregator Yahoo! News reveals the presence of vast amounts of community feedback through comments for published videos and news stories, as well as through metaratings for these comments. This article presents an in-depth study of commenting and comment rating behavior on a sample of more than 10 million user comments on YouTube and Yahoo! News. In this study, comment ratings are considered first-class citizens. Their dependencies with textual content, thread structure of comments, and associated content (e.g., videos and their metadata) are analyzed to obtain a comprehensive understanding of the community commenting behavior. Furthermore, this article explores the applicability of machine learning and data mining to detect acceptance of comments by the community, comments likely to trigger discussions, controversial and polarizing content, and users exhibiting offensive commenting behavior. Results from this study have potential application in guiding the design of community-oriented online discussion platforms.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Casteleyn:2014:TYR, author = "Sven Casteleyn and Irene Garrig{\'o}s and Jose-Norberto Maz{\'o}n", title = "Ten Years of {Rich Internet Applications}: a Systematic Mapping Study, and Beyond", journal = j-TWEB, volume = "8", number = "3", pages = "18:1--18:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2626369", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The term Rich Internet Applications (RIAs) is generally associated with Web applications that provide the features and functionality of traditional desktop applications. Ten years after the introduction of the term, an ample amount of research has been carried out to study various aspects of RIAs. It has thus become essential to summarize this research and provide an adequate overview. OBJECTIVE. The objective of our study is to assemble, classify, and analyze all RIA research performed in the scientific community, thus providing a consolidated overview thereof, and to identify well-established topics, trends, and open research issues. Additionally, we provide a qualitative discussion of the most interesting findings. This work therefore serves as a reference work for beginning and established RIA researchers alike, as well as for industrial actors that need an introduction in the field, or seek pointers to (a specific subset of) the state-of-the-art. METHOD. A systematic mapping study is performed in order to identify all RIA-related publications, define a classification scheme, and categorize, analyze, and discuss the identified research according to it. RESULTS. Our source identification phase resulted in 133 relevant, peer-reviewed publications, published between 2002 and 2011 in a wide variety of venues. They were subsequently classified according to four facets: development activity, research topic, contribution type, and research type. Pie, stacked bar, and bubble charts were used to depict and analyze the results. A deeper analysis is provided for the most interesting and/or remarkable results. CONCLUSION. Analysis of the results shows that, although the RIA term was coined in 2002, the first RIA-related research appeared in 2004. From 2007 there was a significant increase in research activity, peaking in 2009 and decreasing to pre-2009 levels afterwards. All development phases are covered in the identified research, with emphasis on design'' (33\%) and implementation'' (29\%). The majority of research proposes a method'' (44\%), followed by model'' (22\%), methodology'' (18\%), and tools'' (16\%); no publications in the category metrics'' were found. The preponderant research topic is models, methods and methodologies'' (23\%) and, to a lesser extent, usability and accessibility'' and user interface'' (11\% each). On the other hand, the topic localization, internationalization and multilinguality'' received no attention at all, and topics such as deep Web'' (under 1\%), business processing'', usage analysis'', data management'', quality and metrics'' (all under 2\%), semantics'', and performance'' (slightly above 2\%) received very little attention. Finally, there is a large majority of solution proposals'' (66\%), few evaluation research'' (14\%), and even fewer validation'' (6\%), although the latter have been increasing in recent years.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Dincturk:2014:MBA, author = "Mustafa Emre Dincturk and Guy-Vincent Jourdan and Gregor V. Bochmann and Iosif Viorel Onut", title = "A Model-Based Approach for Crawling {Rich Internet Applications}", journal = j-TWEB, volume = "8", number = "3", pages = "19:1--19:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2626371", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "New Web technologies, like AJAX, result in more responsive and interactive Web applications, sometimes called Rich Internet Applications (RIAs). Crawling techniques developed for traditional Web applications are not sufficient for crawling RIAs. The inability to crawl RIAs is a problem that needs to be addressed for at least making RIAs searchable and testable. We present a new methodology, called model-based crawling'', that can be used as a basis to design efficient crawling strategies for RIAs. We illustrate model-based crawling with a sample strategy, called the hypercube strategy''. The performances of our model-based crawling strategies are compared against existing standard crawling strategies, including breadth-first, depth-first, and a greedy strategy. Experimental results show that our model-based crawling approach is significantly more efficient than these standard strategies.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Dragut:2014:MQR, author = "Eduard C. Dragut and Bhaskar Dasgupta and Brian P. Beirne and Ali Neyestani and Badr Atassi and Clement Yu and Weiyi Meng", title = "Merging Query Results From Local Search Engines for Georeferenced Objects", journal = j-TWEB, volume = "8", number = "4", pages = "20:1--20:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2656344", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The emergence of numerous online sources about local services presents a need for more automatic yet accurate data integration techniques. Local services are georeferenced objects and can be queried by their locations on a map, for instance, neighborhoods. Typical local service queries (e.g., French Restaurant in The Loop'') include not only information about what'' (French Restaurant'') a user is searching for (such as cuisine) but also where'' information, such as neighborhood (The Loop''). In this article, we address three key problems: query translation, result merging and ranking. Most local search engines provide a (hierarchical) organization of (large) cities into neighborhoods. A neighborhood in one local search engine may correspond to sets of neighborhoods in other local search engines. These make the query translation challenging. To provide an integrated access to the query results returned by the local search engines, we need to combine the results into a single list of results. Our contributions include: (1) An integration algorithm for neighborhoods. (2) A very effective business listing resolution algorithm. (3) A ranking algorithm that takes into consideration the user criteria, user ratings and rankings. We have created a prototype system, Yumi, over local search engines in the restaurant domain. The restaurant domain is a representative case study for the local services. We conducted a comprehensive experimental study to evaluate Yumi. A prototype version of Yumi is available online.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Chen:2014:CCU, author = "Xihui Chen and Jun Pang and Ran Xue", title = "Constructing and Comparing User Mobility Profiles", journal = j-TWEB, volume = "8", number = "4", pages = "21:1--21:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2637483", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Nowadays, the accumulation of people's whereabouts due to location-based applications has made it possible to construct their mobility profiles. This access to users' mobility profiles subsequently brings benefits back to location-based applications. For instance, in on-line social networks, friends can be recommended not only based on the similarity between their registered information, for instance, hobbies and professions but also referring to the similarity between their mobility profiles. In this article, we propose a new approach to construct and compare users' mobility profiles. First, we improve and apply frequent sequential pattern mining technologies to extract the sequences of places that a user frequently visits and use them to model his mobility profile. Second, we present a new method to calculate the similarity between two users using their mobility profiles. More specifically, we identify the weaknesses of a similarity metric in the literature, and propose a new one which not only fixes the weaknesses but also provides more precise and effective similarity estimation. Third, we consider the semantics of spatio-temporal information contained in user mobility profiles and add them into the calculation of user similarity. It enables us to measure users' similarity from different perspectives. Two specific types of semantics are explored in this article: location semantics and temporal semantics. Last, we validate our approach by applying it to two real-life datasets collected by Microsoft Research Asia and Yonsei University, respectively. The results show that our approach outperforms the existing works from several aspects.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Vural:2014:SFW, author = "A. Gural Vural and B. Barla Cambazoglu and Pinar Karagoz", title = "Sentiment-Focused {Web} Crawling", journal = j-TWEB, volume = "8", number = "4", pages = "22:1--22:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2644821", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Sentiments and opinions expressed in Web pages towards objects, entities, and products constitute an important portion of the textual content available in the Web. In the last decade, the analysis of such content has gained importance due to its high potential for monetization. Despite the vast interest in sentiment analysis, somewhat surprisingly, the discovery of sentimental or opinionated Web content is mostly ignored. This work aims to fill this gap and addresses the problem of quickly discovering and fetching the sentimental content present in the Web. To this end, we design a sentiment-focused Web crawling framework. In particular, we propose different sentiment-focused Web crawling strategies that prioritize discovered URLs based on their predicted sentiment scores. Through simulations, these strategies are shown to achieve considerable performance improvement over general-purpose Web crawling strategies in discovery of sentimental Web content.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Kyusakov:2014:EFE, author = "Rumen Kyusakov and Pablo Pu{\~n}al Pereira and Jens Eliasson and Jerker Delsing", title = "{EXIP}: a Framework for Embedded {Web} Development", journal = j-TWEB, volume = "8", number = "4", pages = "23:1--23:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2665068", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Developing and deploying Web applications on networked embedded devices is often seen as a way to reduce the development cost and time to market for new target platforms. However, the size of the messages and the processing requirements of today's Web protocols, such as HTTP and XML, are challenging for the most resource-constrained class of devices that could also benefit from Web connectivity. New Web protocols using binary representations have been proposed for addressing this issue. Constrained Application Protocol (CoAP) reduces the bandwidth and processing requirements compared to HTTP while preserving the core concepts of the Web architecture. Similarly, Efficient XML Interchange (EXI) format has been standardized for reducing the size and processing time for XML structured information. Nevertheless, the adoption of these technologies is lagging behind due to lack of support from Web browsers and current Web development toolkits. Motivated by these problems, this article presents the design and implementation techniques for the EXIP framework for embedded Web development. The framework consists of a highly efficient EXI processor, a tool for EXI data binding based on templates, and a CoAP/EXI/XHTML Web page engine. A prototype implementation of the EXI processor is herein presented and evaluated. It can be applied to Web browsers or thin server platforms using XHTML and Web services for supporting human-machine interactions in the Internet of Things. This article contains four major results: (1) theoretical and practical evaluation of the use of binary protocols for embedded Web programming; (2) a novel method for generation of EXI grammars based on XML Schema definitions; (3) an algorithm for grammar concatenation that produces normalized EXI grammars directly, and hence reduces the number of iterations during grammar generation; (4) an algorithm for efficient representation of possible deviations from the XML schema.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Thomas:2014:UID, author = "Paul Thomas", title = "Using Interaction Data to Explain Difficulty Navigating Online", journal = j-TWEB, volume = "8", number = "4", pages = "24:1--24:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2656343", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A user's behaviour when browsing a Web site contains clues to that user's experience. It is possible to record some of these behaviours automatically, and extract signals that indicate a user is having trouble finding information. This allows for Web site analytics based on user experiences, not just page impressions. A series of experiments identified user browsing behaviours-such as time taken and amount of scrolling up a page-which predict navigation difficulty and which can be recorded with minimal or no changes to existing sites or browsers. In turn, patterns of page views correlate with these signals and these patterns can help Web authors understand where and why their sites are hard to navigate. A new software tool, LATTE,'' automates this analysis and makes it available to Web authors in the context of the site itself.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{White:2014:CBO, author = "Ryen W. White and Ahmed Hassan", title = "Content Bias in Online Health Search", journal = j-TWEB, volume = "8", number = "4", pages = "25:1--25:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2663355", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Search engines help people answer consequential questions. Biases in retrieved and indexed content (e.g., skew toward erroneous outcomes that represent deviations from reality), coupled with searchers' biases in how they examine and interpret search results, can lead people to incorrect answers. In this article, we seek to better understand biases in search and retrieval, and in particular those affecting the accuracy of content in search results, including the search engine index, features used for ranking, and the formulation of search queries. Focusing on the important domain of online health search, this research broadens previous work on biases in search to examine the role of search systems in contributing to biases. To assess bias, we focus on questions about medical interventions and employ reliable ground truth data from authoritative medical sources. In the course of our study, we utilize large-scale log analysis using data from a popular Web search engine, deep probes of result lists on that search engine, and crowdsourced human judgments of search result captions and landing pages. Our findings reveal bias in results, amplifying searchers' existing biases that appear evident in their search activity. We also highlight significant bias in indexed content and show that specific ranking signals and specific query terms support bias. Both of these can degrade result accuracy and increase skewness in search results. Our analysis has implications for bias mitigation strategies in online search systems, and we offer recommendations for search providers based on our findings.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Fletcher:2015:EPN, author = "Kenneth K. Fletcher and Xiaoqing F. Liu and Mingdong Tang", title = "Elastic Personalized Nonfunctional Attribute Preference and Trade-off Based Service Selection", journal = j-TWEB, volume = "9", number = "1", pages = "1:1--1:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2697389", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 23 17:41:52 MST 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "For service users to get the best service that meet their requirements, they prefer to personalize their nonfunctional attributes, such as reliability and price. However, the personalization makes it challenging for service providers to completely meet users' preferences, because they have to deal with conflicting nonfunctional attributes when selecting services for users. With this in mind, users may sometimes want to explicitly specify their trade-offs among nonfunctional attributes to make their preferences known to service providers. In this article, we present a novel service selection method based on fuzzy logic that considers users' personalized preferences and their trade-offs on nonfunctional attributes during service selection. The method allows users to represent their elastic nonfunctional requirements and associated importance using linguistic terms to specify their personalized trade-off strategies. We present examples showing how the service selection framework is used and a prototype with real-world airline services to evaluate the proposed framework's application.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zhang:2015:REA, author = "Haibin Zhang and Yan Wang and Xiuzhen Zhang and Ee-Peng Lim", title = "{ReputationPro}: The Efficient Approaches to Contextual Transaction Trust Computation in {E}-Commerce Environments", journal = j-TWEB, volume = "9", number = "1", pages = "2:1--2:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2697390", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 23 17:41:52 MST 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In e-commerce environments, the trustworthiness of a seller is utterly important to potential buyers, especially when a seller is not known to them. Most existing trust evaluation models compute a single value to reflect the general trustworthiness of a seller without taking any transaction context information into account. With such a result as the indication of reputation, a buyer may be easily deceived by a malicious seller in a transaction where the notorious value imbalance problem is involved-in other words, a malicious seller accumulates a high-level reputation by selling cheap products and then deceives buyers by inducing them to purchase more expensive products. In this article, we first present a trust vector consisting of three values for contextual transaction trust (CTT). In the computation of CTT values, three identified important context dimensions, including Product Category, Transaction Amount, and Transaction Time, are taken into account. In the meantime, the computation of each CTT value is based on both past transactions and the forthcoming transaction. In particular, with different parameters specified by a buyer regarding context dimensions, different sets of CTT values can be calculated. As a result, all of these trust values can outline the reputation profile of a seller that indicates the dynamic trustworthiness of a seller in different products, product categories, price ranges, time periods, and any necessary combination of them. We name this new model ReputationPro. Nevertheless, in ReputationPro, the computation of reputation profile requires new data structures for appropriately indexing the precomputation of aggregates over large-scale ratings and transaction data in three context dimensions, as well as novel algorithms for promptly answering buyers' CTT queries. In addition, storing precomputed aggregation results consumes a large volume of space, particularly for a system with millions of sellers. Therefore, reducing storage space for aggregation results is also a great demand. To solve these challenging problems, we first propose a new index scheme CMK-tree by extending the two-dimensional K-D-B-tree that indexes spatial data to support efficient computation of CTT values. Then, we further extend the CMK-tree and propose a CMK-tree$^{RS}$approach to reducing the storage space allocated to each seller. The two approaches are not only applicable to three context dimensions that are either linear or hierarchical but also take into account the characteristics of the transaction-time model-that is, transaction data is inserted in chronological order. Moreover, the proposed data structures can index each specific product traded in a time period to compute the trustworthiness of a seller in selling a product. Finally, the experimental results illustrate that the CMK-tree is superior in efficiency of computing CTT values to all three existing approaches in the literature. In particular, while answering a buyer's CTT queries for each brand-based product category, the CMK-tree has almost linear query performance. In addition, with significantly reduced storage space, the CMK-tree$^{RS}$approach can further improve the efficiency in computing CTT values. Therefore, our proposed ReputationPro model is scalable to large-scale e-commerce Web sites in terms of efficiency and storage space consumption.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cai:2015:ALW, author = "Wenbin Cai and Muhan Zhang and Ya Zhang", title = "Active Learning for {Web} Search Ranking via Noise Injection", journal = j-TWEB, volume = "9", number = "1", pages = "3:1--3:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2697391", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 23 17:41:52 MST 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Learning to rank has become increasingly important for many information retrieval applications. To reduce the labeling cost at training data preparation, many active sampling algorithms have been proposed. In this article, we propose a novel active learning-for-ranking strategy called ranking-based sensitivity sampling (RSS), which is tailored for Gradient Boosting Decision Tree (GBDT), a machine-learned ranking method widely used in practice by major commercial search engines for ranking. We leverage the property of GBDT that samples close to the decision boundary tend to be sensitive to perturbations and design the active learning strategy accordingly. We further theoretically analyze the proposed strategy by exploring the connection between the sensitivity used for sample selection and model regularization to provide a potentially theoretical guarantee w.r.t. the generalization capability. Considering that the performance metrics of ranking overweight the top-ranked items, item rank is incorporated into the selection function. In addition, we generalize the proposed technique to several other base learners to show its potential applicability in a wide variety of applications. Substantial experimental results on both the benchmark dataset and a real-world dataset have demonstrated that our proposed active learning strategy is highly effective in selecting the most informative examples.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Gill:2015:CWC, author = "Phillipa Gill and Masashi Crete-Nishihata and Jakub Dalek and Sharon Goldberg and Adam Senft and Greg Wiseman", title = "Characterizing {Web} Censorship Worldwide: Another Look at the {OpenNet} Initiative Data", journal = j-TWEB, volume = "9", number = "1", pages = "4:1--4:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700339", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 23 17:41:52 MST 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In this study, we take another look at 5 years of web censorship data gathered by the OpenNet Initiative in 77 countries using user-based testing with locally relevant content. Prior to our work, this data had been analyzed with little automation, focusing on what content had been blocked, rather than how blocking was carried out. In this study, we use more rigorous automation to obtain a longitudinal, global view of the technical means used for web censorship. We also identify blocking that had been missed in prior analyses. Our results point to considerable variability in the technologies used for web censorship, across countries, time, and types of content, and even across ISPs in the same country. In addition to characterizing web censorship in countries that, thus far, have eluded technical analysis, we also discuss the implications of our observations on the design of future network measurement platforms and circumvention technologies.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Fionda:2015:NFL, author = "Valeria Fionda and Giuseppe Pirr{\o} and Claudio Gutierrez", title = "{NautiLOD}: a Formal Language for the {Web of Data} Graph", journal = j-TWEB, volume = "9", number = "1", pages = "5:1--5:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2697393", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 23 17:41:52 MST 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Web of Linked Data is a huge graph of distributed and interlinked datasources fueled by structured information. This new environment calls for formal languages and tools to automatize navigation across datasources (nodes in such graph) and enable semantic-aware and Web-scale search mechanisms. In this article we introduce a declarative navigational language for the Web of Linked Data graph called N autiLOD. NautiLOD enables one to specify datasources via the intertwining of navigation and querying capabilities. It also features a mechanism to specify actions (e.g., send notification messages) that obtain their parameters from datasources reached during the navigation. We provide a formalization of the NautiLOD semantics, which captures both nodes and fragments of the Web of Linked Data. We present algorithms to implement such semantics and study their computational complexity. We discuss an implementation of the features of NautiLOD in a tool called swget, which exploits current Web technologies and protocols. We report on the evaluation of swget and its comparison with related work. Finally, we show the usefulness of capturing Web fragments by providing examples in different knowledge domains.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Anonymous:2015:E, author = "Anonymous", title = "Editorial", journal = j-TWEB, volume = "9", number = "2", pages = "6:1--6:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2755995", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 27 10:18:18 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Tranquillini:2015:MEI, author = "Stefano Tranquillini and Florian Daniel and Pavel Kucherbaev and Fabio Casati", title = "Modeling, Enacting, and Integrating Custom Crowdsourcing Processes", journal = j-TWEB, volume = "9", number = "2", pages = "7:1--7:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2746353", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 27 10:18:18 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Crowdsourcing (CS) is the outsourcing of a unit of work to a crowd of people via an open call for contributions. Thanks to the availability of online CS platforms, such as Amazon Mechanical Turk or CrowdFlower, the practice has experienced a tremendous growth over the past few years and demonstrated its viability in a variety of fields, such as data collection and analysis or human computation. Yet it is also increasingly struggling with the inherent limitations of these platforms: each platform has its own logic of how to crowdsource work (e.g., marketplace or contest), there is only very little support for structured work (work that requires the coordination of multiple tasks), and it is hard to integrate crowdsourced tasks into state-of-the-art business process management (BPM) or information systems. We attack these three shortcomings by (1) developing a flexible CS platform (we call it Crowd Computer, or CC) that allows one to program custom CS logics for individual and structured tasks, (2) devising a BPMN--based modeling language that allows one to program CC intuitively, (3) equipping the language with a dedicated visual editor, and (4) implementing CC on top of standard BPM technology that can easily be integrated into existing software and processes. We demonstrate the effectiveness of the approach with a case study on the crowd-based mining of mashup model patterns.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Diaz:2015:AWR, author = "Oscar D{\'\i}az and Crist{\'o}bal Arellano", title = "The Augmented {Web}: Rationales, Opportunities, and Challenges on Browser-Side Transcoding", journal = j-TWEB, volume = "9", number = "2", pages = "8:1--8:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2735633", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 27 10:18:18 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Today's web personalization technologies use approaches like user categorization, configuration, and customization but do not fully support individualized requirements. As a significant portion of our social and working interactions are migrating to the web, we can expect an increase in these kinds of minority requirements. Browser-side transcoding holds the promise of facilitating this aim by opening personalization to third parties through web augmentation (WA), realized in terms of extensions and userscripts. WA is to the web what augmented reality is to the physical world: to layer relevant content/layout/navigation over the existing web to improve the user experience. From this perspective, WA is not as powerful as web personalization since its scope is limited to the surface of the web. However, it permits this surface to be tuned by developers other than the sites' webmasters. This opens up the web to third parties who might come up with imaginative ways of adapting the web surface for their own purposes. Its success is backed up by millions of downloads. This work looks at this phenomenon, delving into the what,'' the why,'' and the what for'' of WA, and surveys the challenges ahead for WA to thrive. To this end, we appraise the most downloaded 45 WA extensions for Mozilla Firefox and Google Chrome as well as conduct a systematic literature review to identify what quality issues received the most attention in the literature. The aim is to raise awareness about WA as a key enabler of the personal web and point out research directions.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Sun:2015:ITB, author = "Chang-Ai Sun and Xin Zhang and Yan Shang and Marco Aiello", title = "Integrating Transactions into {BPEL} Service Compositions: an Aspect-Based Approach", journal = j-TWEB, volume = "9", number = "2", pages = "9:1--9:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2757288", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 27 10:18:18 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The concept of software as a service has been increasingly adopted to develop distributed applications. Ensuring the reliability of loosely coupled compositions is a challenging task because of the open, dynamic, and independent nature of composable services; this is especially true when the execution of a service-based process relies on independent but correlated services. Transactions are the prototypical case of compositions spanning across multiple services and needing properties to be valid throughout the whole execution. Although transaction protocols and service composition languages have been proposed in the past decade, a true viable and effective solution is still missing. In this article, we propose a systematic aspect-based approach to integrating transactions into service compositions, taking into account the well-known protocols: Web Service Transaction and Business Process Execution Language (BPEL). In our approach, transaction policies are first defined as a set of aspects. They are then converted to standard BPEL elements. Finally, these transaction-related elements and the original BPEL process are weaved together, resulting in a transactional executable BPEL process. At runtime, transaction management is the responsibility of a middleware, which implements the coordination framework and transaction protocols followed by the transactional BPEL process and transaction-aware Web services. To automate the proposed approach, we developed a supporting platform called Salan to aid the tasks of defining, validating, and weaving aspect-based transaction policies, and of deploying the transactional BPEL processes. By means of a case study, we demonstrate the proposed approach and evaluate the performance of the supporting platform. Experimental results show that this approach is effective in producing reliable business processes while reducing the need for direct human involvement.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Kwasnikowska:2015:FAO, author = "Natalia Kwasnikowska and Luc Moreau and Jan {Van Den Bussche}", title = "A Formal Account of the Open Provenance Model", journal = j-TWEB, volume = "9", number = "2", pages = "10:1--10:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2734116", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 27 10:18:18 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "On the Web, where resources such as documents and data are published, shared, transformed, and republished, provenance is a crucial piece of metadata that would allow users to place their trust in the resources they access. The open provenance model (OPM) is a community data model for provenance that is designed to facilitate the meaningful interchange of provenance information between systems. Underpinning OPM is a notion of directed graph, where nodes represent data products and processes involved in past computations and edges represent dependencies between them; it is complemented by graphical inference rules allowing new dependencies to be derived. Until now, however, the OPM model was a purely syntactical endeavor. The present article extends OPM graphs with an explicit distinction between precise and imprecise edges. Then a formal semantics for the thus enriched OPM graphs is proposed, by viewing OPM graphs as temporal theories on the temporal events represented in the graph. The original OPM inference rules are scrutinized in view of the semantics and found to be sound but incomplete. An extended set of graphical rules is provided and proved to be complete for inference. The article concludes with applications of the formal semantics to inferencing in OPM graphs, operators on OPM graphs, and a formal notion of refinement among OPM graphs.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cappiello:2015:UCA, author = "Cinzia Cappiello and Maristella Matera and Matteo Picozzi", title = "A {UI}-Centric Approach for the End-User Development of Multidevice Mashups", journal = j-TWEB, volume = "9", number = "3", pages = "11:1--11:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2735632", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In recent years, models, composition paradigms, and tools for mashup development have been proposed to support the integration of information sources, services and APIs available on the Web. The challenge is to provide a gate to a programmable Web,'' where end users are allowed to construct easily composite applications that merge content and functions so as to satisfy the long tail of their specific needs. The approaches proposed so far do not fully accommodate this vision. This article, therefore, proposes a mashup development framework that is oriented toward the End-User Development. Given the fundamental role of user interfaces (UIs) as a medium easily understandable by the end users, the proposed approach is characterized by UI-centric models able to support a WYSIWYG (What You See Is What You Get) specification of data integration and service orchestration. It, therefore, contributes to the definition of adequate abstractions that, by hiding the technology and implementation complexity, can be adopted by the end users in a kind of democratic'' paradigm for mashup development. This article also shows how model-to-code generative techniques translate models into application schemas, which in turn guide the dynamic instantiation of the composite applications at runtime. This is achieved through lightweight execution environments that can be deployed on the Web and on mobile devices to support the pervasive use of the created applications.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zafar:2015:SCO, author = "Muhammad Bilal Zafar and Parantapa Bhattacharya and Niloy Ganguly and Krishna P. Gummadi and Saptarshi Ghosh", title = "Sampling Content from Online Social Networks: Comparing Random vs. Expert Sampling of the {Twitter} Stream", journal = j-TWEB, volume = "9", number = "3", pages = "12:1--12:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2743023", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Analysis of content streams gathered from social networking sites such as Twitter has several applications ranging from content search and recommendation, news detection to business analytics. However, processing large amounts of data generated on these sites in real-time poses a difficult challenge. To cope with the data deluge, analytics companies and researchers are increasingly resorting to sampling. In this article, we investigate the crucial question of how to sample content streams generated by users in online social networks. The traditional method is to randomly sample all the data. For example, most studies using Twitter data today rely on the 1\% and 10\% randomly sampled streams of tweets that are provided by Twitter. In this paper, we analyze a different sampling methodology, one where content is gathered only from a relatively small sample ($< 1\%$) of the user population, namely, the expert users. Over the duration of a month, we gathered tweets from over 500,000 Twitter users who are identified as experts on a diverse set of topics, and compared the resulting expert sampled tweets with the 1\% randomly sampled tweets provided publicly by Twitter. We compared the sampled datasets along several dimensions, including the popularity, topical diversity, trustworthiness, and timeliness of the information contained within them, and on the sentiment/opinion expressed on specific topics. Our analysis reveals several important differences in data obtained through the different sampling methodologies, which have serious implications for applications such as topical search, trustworthy content recommendations, breaking news detection, and opinion mining.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wang:2015:SWU, author = "Yazhe Wang and Jamie Callan and Baihua Zheng", title = "Should We Use the Sample? {Analyzing} Datasets Sampled from {Twitter}'s Stream {API}", journal = j-TWEB, volume = "9", number = "3", pages = "13:1--13:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2746366", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Researchers have begun studying content obtained from microblogging services such as Twitter to address a variety of technological, social, and commercial research questions. The large number of Twitter users and even larger volume of tweets often make it impractical to collect and maintain a complete record of activity; therefore, most research and some commercial software applications rely on samples, often relatively small samples, of Twitter data. For the most part, sample sizes have been based on availability and practical considerations. Relatively little attention has been paid to how well these samples represent the underlying stream of Twitter data. To fill this gap, this article performs a comparative analysis on samples obtained from two of Twitter's streaming APIs with a more complete Twitter dataset to gain an in-depth understanding of the nature of Twitter data samples and their potential for use in various data mining tasks.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Su:2015:RRT, author = "Zhiyuan Su and Ling Liu and Mingchu Li and Xinxin Fan and Yang Zhou", title = "Reliable and Resilient Trust Management in Distributed Service Provision Networks", journal = j-TWEB, volume = "9", number = "3", pages = "14:1--14:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2754934", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Distributed service networks are popular platforms for service providers to offer services to consumers and for service consumers to acquire services from unknown parties. eBay and Amazon are two well-known examples of enabling and hosting such service networks to connect service providers to service consumers. Trust management is a critical component for scaling such distributed service networks to a large and growing number of participants. In this article, we present ServiceTrust$^{++}$, a feedback quality--sensitive and attack resilient trust management scheme for empowering distributed service networks with effective trust management capability. Compared with existing trust models, ServiceTrust$^{++}$has several novel features. First, we present six attack models to capture both independent and colluding attacks with malicious cliques, malicious spies, and malicious camouflages. Second, we aggregate the feedback ratings based on the variances of participants' feedback behaviors and incorporate feedback similarity as weight into the local trust algorithm. Third, we compute the global trust of a participant by employing conditional trust propagation based on the feedback similarity threshold. This allows ServiceTrust$^{++}$to control and prevent malicious spies and malicious camouflage peers from boosting their global trust scores by manipulating the feedback ratings of good peers and by taking advantage of the uniform trust propagation. Finally, we systematically combine a trust-decaying strategy with a threshold value--based conditional trust propagation to further strengthen the robustness of our global trust computation against sophisticated malicious feedback. Experimental evaluation with both simulation-based networks and real network dataset Epinion show that ServiceTrust$^{++}$is highly resilient against all six attack models and highly effective compared to EigenTrust, the most popular and representative trust propagation model to date.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Calzavara:2015:SLA, author = "Stefano Calzavara and Gabriele Tolomei and Andrea Casini and Michele Bugliesi and Salvatore Orlando", title = "A Supervised Learning Approach to Protect Client Authentication on the {Web}", journal = j-TWEB, volume = "9", number = "3", pages = "15:1--15:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2754933", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Browser-based defenses have recently been advocated as an effective mechanism to protect potentially insecure web applications against the threats of session hijacking, fixation, and related attacks. In existing approaches, all such defenses ultimately rely on client-side heuristics to automatically detect cookies containing session information, to then protect them against theft or otherwise unintended use. While clearly crucial to the effectiveness of the resulting defense mechanisms, these heuristics have not, as yet, undergone any rigorous assessment of their adequacy. In this article, we conduct the first such formal assessment, based on a ground truth of 2,464 cookies we collect from 215 popular websites of the Alexa ranking. To obtain the ground truth, we devise a semiautomatic procedure that draws on the novel notion of authentication token, which we introduce to capture multiple web authentication schemes. We test existing browser-based defenses in the literature against our ground truth, unveiling several pitfalls both in the heuristics adopted and in the methods used to assess them. We then propose a new detection method based on supervised learning, where our ground truth is used to train a set of binary classifiers, and report on experimental evidence that our method outperforms existing proposals. Interestingly, the resulting classifiers, together with our hands-on experience in the construction of the ground truth, provide new insight on how web authentication is actually implemented in practice.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Lee:2015:DPM, author = "Sihyung Lee", title = "Detection of Political Manipulation in Online Communities through Measures of Effort and Collaboration", journal = j-TWEB, volume = "9", number = "3", pages = "16:1--16:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2767134", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Online social media allow users to interact with one another by sharing opinions, and these opinions have a critical impact on the way readers think and behave. Accordingly, an increasing number of {$<$i$>$manipulators$<$}/{i$>$} deliberately spread messages to influence the public, often in an organized manner. In particular, political manipulation-manipulation of opponents to win political advantage-can result in serious consequences: antigovernment riots can break out, leading to candidates' defeat in an election. A few approaches have been proposed to detect such manipulation based on the level of social interaction (i.e., manipulators actively post opinions but infrequently befriend and reply to other users). However, several studies have shown that the interactions can be forged at a low cost and thus may not be effective measures of manipulation. To go one step further, we collect a dataset for real, large-scale political manipulation, which consists of opinions found on Internet forums. These opinions are divided into manipulators and nonmanipulators. Using this collection, we demonstrate that manipulators inevitably work hard, in teams, to quickly influence a large audience. With this in mind, it could be said that a high level of collaborative efforts strongly indicates manipulation. For example, a group of manipulators may jointly post numerous opinions with a consistent theme and selectively recommend the same, well-organized opinion to promote its rank. We show that the effort measures, when combined with a supervised learning algorithm, successfully identify greater than 95\% of the manipulators. We believe that the proposed method will help system administrators to accurately detect manipulators in disguise, significantly decreasing the intensity of manipulation.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Gollapalli:2015:IRH, author = "Sujatha Das Gollapalli and Cornelia Caragea and Prasenjit Mitra and C. Lee Giles", title = "Improving Researcher Homepage Classification with Unlabeled Data", journal = j-TWEB, volume = "9", number = "4", pages = "17:1--17:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2767135", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 25 07:43:09 MST 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/hash.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A classifier that determines if a webpage is relevant to a specified set of topics comprises a key component for focused crawling. Can a classifier that is tuned to perform well on training datasets continue to filter out irrelevant pages in the face of changing content on the Web? We investigate this question in the context of identifying researcher homepages. We show experimentally that classifiers trained on existing datasets of academic homepages underperform on non-homepages'' present on current-day academic websites. As an alternative to obtaining labeled datasets to retrain classifiers for the new content, in this article we ask the following question: How can we effectively use the unlabeled data readily available from academic websites to improve researcher homepage classification?'' We design novel URL-based features and use them in conjunction with content-based features for representing homepages. Within the co-training framework, these sets of features can be treated as complementary views enabling us to effectively use unlabeled data and obtain remarkable improvements in homepage identification on the current-day academic websites. We also propose a novel technique for learning a conforming pair of classifiers'' that mimics co-training. Our algorithm seeks to minimize a loss (objective) function quantifying the difference in predictions from the two views afforded by co-training. We argue that this loss formulation provides insights for understanding co-training and can be used even in the absence of a validation dataset. Our next set of findings pertains to the evaluation of other state-of-the-art techniques for classifying homepages. First, we apply feature selection (FS) and feature hashing (FH) techniques independently and in conjunction with co-training to academic homepages. FS is a well-known technique for removing redundant and unnecessary features from the data representation, whereas FH is a technique that uses hash functions for efficient encoding of features. We show that FS can be effectively combined with co-training to obtain further improvements in identifying homepages. However, using hashed feature representations, a performance degradation is observed possibly due to feature collisions. Finally, we evaluate other semisupervised algorithms for homepage classification. We show that although several algorithms are effective in using information from the unlabeled instances, co-training that explicitly harnesses the feature split in the underlying instances outperforms approaches that combine content and URL features into a single view.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wang:2015:DCU, author = "Jing Wang and Clement T. Yu and Philip S. Yu and Bing Liu and Weiyi Meng", title = "Diversionary Comments under Blog Posts", journal = j-TWEB, volume = "9", number = "4", pages = "18:1--18:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2789211", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 25 07:43:09 MST 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "There has been a recent swell of interest in the analysis of blog comments. However, much of the work focuses on detecting comment spam in the blogsphere. An important issue that has been neglected so far is the identification of diversionary comments. Diversionary comments are defined as comments that divert the topic from the original post. A possible purpose is to distract readers from the original topic and draw attention to a new topic. We categorize diversionary comments into five types based on our observations and propose an effective framework to identify and flag them. To the best of our knowledge, the problem of detecting diversionary comments has not been studied so far. We solve the problem in two different ways: (i) rank all comments in descending order of being diversionary and (ii) consider it as a classification problem. Our evaluation on 4,179 comments under 40 different blog posts from Digg and Reddit shows that the proposed method achieves the high mean average precision of 91.9\% when the problem is considered as a ranking problem and 84.9\% of F-measure as a classification problem. Sensitivity analysis indicates that the effectiveness of the method is stable under different parameter settings.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Katzir:2015:ECC, author = "Liran Katzir and Stephen J. Hardiman", title = "Estimating Clustering Coefficients and Size of Social Networks via Random Walk", journal = j-TWEB, volume = "9", number = "4", pages = "19:1--19:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2790304", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 25 07:43:09 MST 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This work addresses the problem of estimating social network measures. Specifically, the measures at hand are the network average and global clustering coefficients and the number of registered users. The algorithms at hand (1) assume no prior knowledge about the network and (2) access the network using only the publicly available interface. More precisely, this work provides (a) a unified approach for clustering coefficients estimation and (b) a new network size estimator. The unified approach for the clustering coefficients yields the first external access algorithm for estimating the global clustering coefficient. The new network size estimator offers improved accuracy compared to prior art estimators. Our approach is to view a social network as an undirected graph and use the public interface to retrieve a random walk. To estimate the clustering coefficient, the connectivity of each node in the random walk sequence is tested in turn. We show that the error drops exponentially in the number of random walk steps. For the network size estimation we offer a generalized view of prior art estimators that in turn yields an improved estimator. All algorithms are validated on several publicly available social network datasets.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Watanabe:2015:FQM, author = "Willian Massami Watanabe and Ana Luiza Dias and Renata Pontin {De Mattos Fortes}", title = "{Fona}: Quantitative Metric to Measure Focus Navigation on Rich {Internet} Applications", journal = j-TWEB, volume = "9", number = "4", pages = "20:1--20:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2812812", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 25 07:43:09 MST 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Web 2.0 brought new requirements to the architecture of web systems. Web applications' interfaces are becoming more and more interactive. However, these changes are severely impacting how disabled users interact through assistive technologies with the web. In order to deploy an accessible web application, developers can use WAI-ARIA to design an accessible web application, which manually implements focus and keyboard navigation mechanisms. This article presents a quantitative metric, named Fona, which measures how the Focus Navigation WAI-ARIA requirement has been implemented on the web. Fona counts JavaScript mouse event listeners, HTML elements with role attributes, and TabIndex attributes in the DOM structure of webpages. Fona's evaluation approach provides a narrow analysis of one single accessibility requirement. But it enables monitoring this accessibility requirement in a large number of webpages. This monitoring activity might be used to give insights about how Focus Navigation and ARIA requirements have been considered by web development teams. Fona is validated comparing the results of a set of WAI-ARIA conformant implementations and a set of webpages formed by Alexa's 349 top most popular websites. The analysis of Fona's value for Alexa's websites highlights that many websites still lack the implementation of Focus Navigation through their JavaScript interactive content.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Marszalkowski:2016:ASC, author = "Jakub Marszalkowski and Jan Mizgajski and Dariusz Mokwa and Maciej Drozdowski", title = "Analysis and Solution of {CSS}-Sprite Packing Problem", journal = j-TWEB, volume = "10", number = "1", pages = "1:1--1:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2818377", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A CSS-sprite packing problem is considered in this article. CSS-sprite is a technique of combining many pictures of a web page into one image for the purpose of reducing network transfer time. The CSS-sprite packing problem is formulated here as an optimization challenge. The significance of geometric packing, image compression and communication performance is discussed. A mathematical model for constructing multiple sprites and optimization of load time is proposed. The impact of PNG-sprite aspect ratio on file size is studied experimentally. Benchmarking of real user web browsers communication performance covers latency, bandwidth, number of concurrent channels as well as speedup from parallel download. Existing software for building CSS-sprites is reviewed. A novel method, called Spritepack, is proposed and evaluated. Spritepack outperforms current software.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Amor:2016:DBT, author = "Iheb Ben Amor and Salima Benbernou and Mourad Ouziri and Zaki Malik and Brahim Medjahed", title = "Discovering Best Teams for Data Leak-Aware Crowdsourcing in Social Networks", journal = j-TWEB, volume = "10", number = "1", pages = "2:1--2:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2814573", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Crowdsourcing is emerging as a powerful paradigm to help perform a wide range of tedious tasks in various enterprise applications. As such applications become more complex, crowdsourcing systems often require the collaboration of several experts connected through professional/social networks and organized in various teams. For instance, a well-known car manufacturer asked fans to contribute ideas for the kinds of technologies that should be incorporated into one of its cars. For that purpose, fans needed to collaborate and form teams competing with each others to come up with the best ideas. However, once teams are formed, each one would like to provide the best solution and treat that solution as a trade secret,'' hence preventing any data leak to its competitors (i.e., the other teams). In this article, we propose a data leak--aware crowdsourcing system called SocialCrowd. We introduce a clustering algorithm that uses social relationships between crowd workers to discover all possible teams while avoiding interteam data leakage. We also define a ranking mechanism to select the best'' team configurations. Our mechanism is based on the semiring approach defined in the area of soft constraints programming. Finally, we present experiments to assess the efficiency of the proposed approach.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Song:2016:IJV, author = "Hengjie Song and Yonghui Xu and Huaqing Min and Qingyao Wu and Wei Wei and Jianshu Weng and Xiaogang Han and Qiang Yang and Jialiang Shi and Jiaqian Gu and Chunyan Miao and Nishida Toyoaki", title = "Individual Judgments Versus Consensus: Estimating Query-{URL} Relevance", journal = j-TWEB, volume = "10", number = "1", pages = "3:1--3:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2834122", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Query-URL relevance, measuring the relevance of each retrieved URL with respect to a given query, is one of the fundamental criteria to evaluate the performance of commercial search engines. The traditional way to collect reliable and accurate query-URL relevance requires multiple annotators to provide their individual judgments based on their subjective expertise (e.g., understanding of user intents). In this case, the annotators' subjectivity reflected in each annotator individual judgment (AIJ) inevitably affects the quality of the ground truth relevance (GTR). But to the best of our knowledge, the potential impact of AIJs on estimating GTRs has not been studied and exploited quantitatively by existing work. This article first studies how multiple AIJs and GTRs are correlated. Our empirical studies find that the multiple AIJs possibly provide more cues to improve the accuracy of estimating GTRs. Inspired by this finding, we then propose a novel approach to integrating the multiple AIJs with the features characterizing query-URL pairs for estimating GTRs more accurately. Furthermore, we conduct experiments in a commercial search engine-Baidu.com-and report significant gains in terms of the normalized discounted cumulative gains.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zhang:2016:DSP, author = "Xianchao Zhang and Zhaoxing Li and Shaoping Zhu and Wenxin Liang", title = "Detecting Spam and Promoting Campaigns in {Twitter}", journal = j-TWEB, volume = "10", number = "1", pages = "4:1--4:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2846102", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Twitter has become a target platform for both promoters and spammers to disseminate their messages, which are more harmful than traditional spamming methods, such as email spamming. Recently, large amounts of campaigns that contain lots of spam or promotion accounts have emerged in Twitter. The campaigns cooperatively post unwanted information, and thus they can infect more normal users than individual spam or promotion accounts. Organizing or participating in campaigns has become the main technique to spread spam or promotion information in Twitter. Since traditional solutions focus on checking individual accounts or messages, efficient techniques for detecting spam and promotion campaigns in Twitter are urgently needed. In this article, we propose a framework to detect both spam and promotion campaigns. Our framework consists of three steps: the first step links accounts who post URLs for similar purposes; the second step extracts candidate campaigns that may be for spam or promotion purposes; and the third step classifies the candidate campaigns into normal, spam, and promotion groups. The key point of the framework is how to measure the similarity between accounts' purposes of posting URLs. We present two measure methods based on Shannon information theory: the first one uses the URLs posted by the users, and the second one considers both URLs and timestamps. Experimental results demonstrate that the proposed methods can extract the majority of the candidate campaigns correctly, and detect promotion and spam campaigns with high precision and recall.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Eshuis:2016:FCE, author = "Rik Eshuis and Freddy L{\'e}cu{\'e} and Nikolay Mehandjiev", title = "Flexible Construction of Executable Service Compositions from Reusable Semantic Knowledge", journal = j-TWEB, volume = "10", number = "1", pages = "5:1--5:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2842628", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Most service composition approaches rely on top-down decomposition of a problem and AI-style planning to assemble service components into a meaningful whole, impeding reuse and flexibility. In this article, we propose an approach that starts from declarative knowledge about the semantics of individual service components and algorithmically constructs a full-blown service orchestration process that supports sequence, choice, and parallelism. The output of our algorithm can be mapped directly into a number of service orchestration languages such as OWL-S and BPEL. The approach consists of two steps. First, semantic links specifying data dependencies among the services are derived and organized in a flexible network. Second, based on a user request indicating the desired outcomes from the composition, an executable composition is constructed from the network that satisfies the dependencies. The approach is unique in producing complex compositions out of semantic links between services in a flexible way. It also allows reusing knowledge about semantic dependencies in the network to generate new compositions through new requests and modification of services at runtime. The approach has been implemented in a prototype that outperforms related composition prototypes in experiments.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Avila:2016:WTC, author = "Bruno T. {\'A}vila and Rafael D. Lins", title = "W-tree: a Compact External Memory Representation for Webgraphs", journal = j-TWEB, volume = "10", number = "1", pages = "6:1--6:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2835181", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "World Wide Web applications need to use, constantly update, and maintain large webgraphs for executing several tasks, such as calculating the web impact factor, finding hubs and authorities, performing link analysis by webometrics tools, and ranking webpages by web search engines. Such webgraphs need to use a large amount of main memory, and, frequently, they do not completely fit in, even if compressed. Therefore, applications require the use of external memory. This article presents a new compact representation for webgraphs, called w-tree, which is designed specifically for external memory. It supports the execution of basic queries (e.g., full read, random read, and batch random read), set-oriented queries (e.g., superset, subset, equality, overlap, range, inlink, and co-inlink), and some advanced queries, such as edge reciprocal and hub and authority. Furthermore, a new layout tree designed specifically for webgraphs is also proposed, reducing the overall storage cost and allowing the random read query to be performed with an asymptotically faster runtime in the worst case. To validate the advantages of the w-tree, a series of experiments are performed to assess an implementation of the w-tree comparing it to a compact main memory representation. The results obtained show that w-tree is competitive in compression time and rate and in query time, which may execute several orders of magnitude faster for set-oriented queries than its competitors. The results provide empirical evidence that it is feasible to use a compact external memory representation for webgraphs in real applications, contradicting the previous assumptions made by several researchers.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wang:2016:STQ, author = "Xinyu Wang and Jianke Zhu and Zibin Zheng and Wenjie Song and Yuanhong Shen and Michael R. Lyu", title = "A Spatial-Temporal {QoS} Prediction Approach for Time-aware {Web} Service Recommendation", journal = j-TWEB, volume = "10", number = "1", pages = "7:1--7:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2801164", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Due to the popularity of service-oriented architectures for various distributed systems, an increasing number of Web services have been deployed all over the world. Recently, Web service recommendation became a hot research topic, one that aims to accurately predict the quality of functional satisfactory services for each end user. Generally, the performance of Web service changes over time due to variations of service status and network conditions. Instead of employing the conventional temporal models, we propose a novel spatial-temporal QoS prediction approach for time-aware Web service recommendation, where a sparse representation is employed to model QoS variations. Specifically, we make a zero-mean Laplace prior distribution assumption on the residuals of the QoS prediction, which corresponds to a Lasso regression problem. To effectively select the nearest neighbor for the sparse representation of temporal QoS values, the geo-location of web service is employed to reduce searching range while improving prediction accuracy. The extensive experimental results demonstrate that the proposed approach outperforms state-of-art methods with more than 10\% improvement on the accuracy of temporal QoS prediction for time-aware Web service recommendation.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Guo:2016:NEB, author = "Guibing Guo and Jie Zhang and Neil Yorke-Smith", title = "A Novel Evidence-Based {Bayesian} Similarity Measure for Recommender Systems", journal = j-TWEB, volume = "10", number = "2", pages = "8:1--8:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2856037", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "User-based collaborative filtering, a widely used nearest neighbour-based recommendation technique, predicts an item's rating by aggregating its ratings from similar users. User similarity is traditionally calculated by cosine similarity or the Pearson correlation coefficient. However, both of these measures consider only the direction of rating vectors, and suffer from a range of drawbacks. To overcome these issues, we propose a novel Bayesian similarity measure based on the Dirichlet distribution, taking into consideration both the direction and length of rating vectors. We posit that not all the rating pairs should be equally counted in order to accurately model user correlation. Three different evidence factors are designed to compute the weights of rating pairs. Further, our principled method reduces correlation due to chance and potential system bias. Experimental results on six real-world datasets show that our method achieves superior accuracy in comparison with counterparts.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Frattolillo:2016:BFM, author = "Franco Frattolillo", title = "A Buyer-Friendly and Mediated Watermarking Protocol for {Web} Context", journal = j-TWEB, volume = "10", number = "2", pages = "9:1--9:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2856036", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Watermarking protocols are used in conjunction with digital watermarking techniques to protect digital copyright on the Internet. They define the schemes of the web transactions by which buyers can purchase protected digital content distributed by content providers in a secure manner. Over the last few years, significant examples of watermarking protocols have been proposed in literature. However, a detailed examination of such protocols has revealed a number of problems that have to be addressed in order to make them suited for current web context. Therefore, based on the most relevant problems derived from literature, this article identifies the main challenges posed by the development of watermarking protocols for web context and presents a watermarking protocol that follows a new secure, buyer-centric and mediated design approach able to meet such challenges.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wu:2016:QDQ, author = "Wensheng Wu and Weiyi Meng and Weifeng Su and Guangyou Zhou and Yao-Yi Chiang", title = "{Q2P}: Discovering Query Templates via Autocompletion", journal = j-TWEB, volume = "10", number = "2", pages = "10:1--10:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2873061", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We present Q2P, a system that discovers query templates from search engines via their query autocompletion services. Q2P is distinct from the existing works in that it does not rely on query logs of search engines that are typically not readily available. Q2P is also unique in that it uses a trie to economically store queries sampled from a search engine and employs a beam-search strategy that focuses the expansion of the trie on its most promising nodes. Furthermore, Q2P leverages the trie-based storage of query sample to discover query templates using only two passes over the trie. Q2P is a key part of our ongoing project Deep2Q on a template-driven data integration on the Deep Web, where the templates learned by Q2P are used to guide the integration process in Deep2Q. Experimental results on four major search engines indicate that (1) Q2P sends only a moderate number of queries (ranging from 597 to 1,135) to the engines, while obtaining a significant number of completions per query (ranging from 4.2 to 8.5 on the average); (2) a significant number of templates (ranging from 8 to 32 when the minimum support for frequent templates is set to 1\%) may be discovered from the samples.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Walk:2016:ADC, author = "Simon Walk and Denis Helic and Florian Geigl and Markus Strohmaier", title = "Activity Dynamics in Collaboration Networks", journal = j-TWEB, volume = "10", number = "2", pages = "11:1--11:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2873060", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Many online collaboration networks struggle to gain user activity and become self-sustaining due to the ramp-up problem or dwindling activity within the system. Prominent examples include online encyclopedias such as (Semantic) MediaWikis, Question and Answering portals such as StackOverflow, and many others. Only a small fraction of these systems manage to reach self-sustaining activity, a level of activity that prevents the system from reverting to a nonactive state. In this article, we model and analyze activity dynamics in synthetic and empirical collaboration networks. Our approach is based on two opposing and well-studied principles: (i) without incentives, users tend to lose interest to contribute and thus, systems become inactive, and (ii) people are susceptible to actions taken by their peers (social or peer influence). With the activity dynamics model that we introduce in this article we can represent typical situations of such collaboration networks. For example, activity in a collaborative network, without external impulses or investments, will vanish over time, eventually rendering the system inactive. However, by appropriately manipulating the activity dynamics and/or the underlying collaboration networks, we can jump-start a previously inactive system and advance it toward an active state. To be able to do so, we first describe our model and its underlying mechanisms. We then provide illustrative examples of empirical datasets and characterize the barrier that has to be breached by a system before it can become self-sustaining in terms of critical mass and activity dynamics. Additionally, we expand on this empirical illustration and introduce a new metric$p--- the Activity Momentum --- to assess the activity robustness of collaboration networks.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zheng:2016:PQA, author = "Huiyuan Zheng and Jian Yang and Weiliang Zhao", title = "Probabilistic {QoS} Aggregations for Service Composition", journal = j-TWEB, volume = "10", number = "2", pages = "12:1--12:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2876513", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In this article, we propose a comprehensive approach for Quality of Service (QoS) calculation in service composition. Differing from the existing work on QoS aggregations that represent QoS as single values, discrete values with frequencies, or standard statistical distributions, the proposed approach has the capability to handle any type of QoS probability distribution. A set of formulae and algorithms are developed to calculate the QoS of a composite service according to four identified basic patterns as sequential, parallel, conditional, and loop. We demonstrate that the proposed QoS calculation method is much more efficient than existing simulation methods. It has a high scalability and builds a solid foundation for real-time QoS analysis and prediction in service composition. Experiment results are provided to show the effectiveness and efficiency of the proposed method.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Paul:2016:SBC, author = "Michael J. Paul and Ryen W. White and Eric Horvitz", title = "Search and Breast Cancer: On Episodic Shifts of Attention over Life Histories of an Illness", journal = j-TWEB, volume = "10", number = "2", pages = "13:1--13:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2893481", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We seek to understand the evolving needs of people who are faced with a life-changing medical diagnosis based on analyses of queries extracted from an anonymized search query log. Focusing on breast cancer, we manually tag a set of Web searchers as showing patterns of search behavior consistent with someone grappling with the screening, diagnosis, and treatment of breast cancer. We build and apply probabilistic classifiers to detect these searchers from multiple sessions and to identify the timing of diagnosis using temporal and statistical features. We explore the changes in information seeking over time before and after an inferred diagnosis of breast cancer by aligning multiple searchers by the estimated time of diagnosis. We employ the classifier to automatically identify 1,700 candidate searchers with an estimated 90\% precision, and we predict the day of diagnosis within 15 days with an 88\% accuracy. We show that the geographic and demographic attributes of searchers identified with high probability are strongly correlated with ground truth of reported incidence rates. We then analyze the content of queries over time for inferred cancer patients, using a detailed ontology of cancer-related search terms. The analysis reveals the rich temporal structure of the evolving queries of people likely diagnosed with breast cancer. Finally, we focus on subtypes of illness based on inferred stages of cancer and show clinically relevant dynamics of information seeking based on the dominant stage expressed by searchers.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Doerfel:2016:WUA, author = "Stephan Doerfel and Daniel Zoller and Philipp Singer and Thomas Niebler and Andreas Hotho and Markus Strohmaier", title = "What Users Actually Do in a Social Tagging System: a Study of User Behavior in {BibSonomy}", journal = j-TWEB, volume = "10", number = "2", pages = "14:1--14:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2896821", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Social tagging systems have established themselves as an important part in today's Web and have attracted the interest of our research community in a variety of investigations. Henceforth, several aspects of social tagging systems have been discussed and assumptions have emerged on which our community builds their work. Yet, testing such assumptions has been difficult due to the absence of suitable usage data in the past. In this work, we thoroughly investigate and evaluate four aspects about tagging systems, covering social interaction, retrieval of posted resources, the importance of the three different types of entities, users, resources, and tags, as well as connections between these entities' popularity in posted and in requested content. For that purpose, we examine live server log data gathered from the real-world, public social tagging system BibSonomy. Our empirical results paint a mixed picture about the four aspects. Although typical assumptions hold to a certain extent for some, other aspects need to be reflected in a very critical light. Our observations have implications for the understanding of social tagging systems and the way they are used on the Web. We make the dataset used in this work available to other researchers.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Naini:2016:SEW, author = "Kaweh Djafari Naini and Ismail Sengor Altingovde and Wolf Siberski", title = "Scalable and Efficient {Web} Search Result Diversification", journal = j-TWEB, volume = "10", number = "3", pages = "15:1--15:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2907948", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:09 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "It has been shown that top-k$retrieval quality can be considerably improved by taking not only relevance but also diversity into account. However, currently proposed diversification approaches have not put much attention on practical usability in large-scale settings, such as modern web search systems. In this work, we make two contributions toward this goal. First, we propose a combination of optimizations and heuristics for an implicit diversification algorithm based on the desirable facility placement principle, and present two algorithms that achieve linear complexity without compromising the retrieval effectiveness. Instead of an exhaustive comparison of documents, these algorithms first perform a clustering phase and then exploit its outcome to compose the diverse result set. Second, we describe and analyze two variants for distributed diversification in a computing cluster, for large-scale IR where the document collection is too large to keep in one node. Our contribution in this direction is pioneering, as there exists no earlier work in the literature that investigates the effectiveness and efficiency of diversification on a distributed setup. Extensive evaluations on a standard TREC framework demonstrate a competitive retrieval quality of the proposed optimizations to the baseline algorithm while reducing the processing time by more than 80\% and up to 97\%, and shed light on the efficiency and effectiveness tradeoffs of diversification when applied on top of a distributed architecture.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Furche:2016:PFW, author = "Tim Furche and Giovanni Grasso and Michael Huemer and Christian Schallhart and Michael Schrefl", title = "{PeaCE-Ful} {Web} Event Extraction and Processing as Bitemporal Mutable Events", journal = j-TWEB, volume = "10", number = "3", pages = "16:1--16:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2911989", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:09 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The web is the largest bulletin board of the world. Events of all types, from flight arrivals to business meetings, are announced on this board. Tracking and reacting to such event announcements, however, is a tedious manual task, only slightly alleviated by email or similar notifications. Announcements are published with human readers in mind, and updates or delayed announcements are frequent. These characteristics have hampered attempts at automatic tracking. P eaCE provides the first integrated framework for event processing on top of web event ads, consisting of event extraction, complex event processing, and action execution in response to these events. Given a schema of the events to be tracked, the framework populates this schema by extracting events from announcement sources. This extraction is performed by little programs called wrappers that produce the events including updates and retractions. PeaCE then queries these events to detect complex events, often combining announcements from multiple sources. To deal with updates and delayed announcements, PeaCE's schemas are bitemporal, to distinguish between occurrence and detection time. This allows complex event specifications to track updates and to react upon differences in occurrence and detection time. In case of new, changing, or deleted events, PeaCE allows one to execute actions, such as tweeting or sending out email notifications. Actions are typically specified as web interactions, for example, to fill and submit a form with attributes of the triggering event. Our evaluation shows that P eaCE's processing is dominated by the time needed for accessing the web to extract events and perform actions, allotting to 97.4\%. Thus, PeaCE requires only 2.6\% overhead, and therefore, the complex event processor scales well even with moderate resources. We further show that simple and reasonable restrictions on complex event specifications and the timing of constituent events suffice to guarantee that PeaCE only requires a constant buffer to process arbitrarily many event announcements.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cranor:2016:LSE, author = "Lorrie Faith Cranor and Pedro Giovanni Leon and Blase Ur", title = "A Large-Scale Evaluation of {U.S.} Financial Institutions' Standardized Privacy Notices", journal = j-TWEB, volume = "10", number = "3", pages = "17:1--17:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2911988", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:09 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Financial institutions in the United States are required by the Gramm-Leach-Bliley Act to provide annual privacy notices. In 2009, eight federal agencies jointly released a model privacy form for these disclosures. While the use of this model privacy form is not required, it has been widely adopted. We automatically evaluated 6,191 U.S. financial institutions' privacy notices posted on the World Wide Web. We found large variance in stated practices, even among institutions of the same type. While thousands of financial institutions share personal information without providing the opportunity for consumers to opt out, some institutions' practices are more privacy protective. Regression analyses show that large institutions and those headquartered in the northeastern region share consumers' personal information at higher rates than all other institutions. Furthermore, our analysis helped us uncover institutions that do not let consumers limit data sharing when legally required to do so, as well as institutions making self-contradictory statements. We discuss implications for privacy in the financial industry, issues with the design and use of the model privacy form on the World Wide Web, and future directions for standardized privacy notice.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Srba:2016:CSC, author = "Ivan Srba and Maria Bielikova", title = "A Comprehensive Survey and Classification of Approaches for Community Question Answering", journal = j-TWEB, volume = "10", number = "3", pages = "18:1--18:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2934687", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:09 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Community question-answering (CQA) systems, such as Yahoo! Answers or Stack Overflow, belong to a prominent group of successful and popular Web 2.0 applications, which are used every day by millions of users to find an answer on complex, subjective, or context-dependent questions. In order to obtain answers effectively, CQA systems should optimally harness collective intelligence of the whole online community, which will be impossible without appropriate collaboration support provided by information technologies. Therefore, CQA became an interesting and promising subject of research in computer science and now we can gather the results of 10 years of research. Nevertheless, in spite of the increasing number of publications emerging each year, so far the research on CQA systems has missed a comprehensive state-of-the-art survey. We attempt to fill this gap by a review of 265 articles published between 2005 and 2014, which were selected from major conferences and journals. According to this evaluation, at first we propose a framework that defines descriptive attributes of CQA approaches. Second, we introduce a classification of all approaches with respect to problems they are aimed to solve. The classification is consequently employed in a review of a significant number of representative approaches, which are described by means of attributes from the descriptive framework. As a part of the survey, we also depict the current trends as well as highlight the areas that require further attention from the research community.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Hwang:2016:PPS, author = "Seung-Won Hwang and Saehoon Kim and Yuxiong He and Sameh Elnikety and Seungjin Choi", title = "Prediction and Predictability for Search Query Acceleration", journal = j-TWEB, volume = "10", number = "3", pages = "19:1--19:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2943784", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:09 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A commercial web search engine shards its index among many servers, and therefore the response time of a search query is dominated by the slowest server that processes the query. Prior approaches target improving responsiveness by reducing the tail latency, or high-percentile response time, of an individual search server. They predict query execution time, and if a query is predicted to be long-running, it runs in parallel; otherwise, it runs sequentially. These approaches are, however, not accurate enough for reducing a high tail latency when responses are aggregated from many servers because this requires each server to reduce a substantially higher tail latency (e.g., the 99.99th percentile), which we call extreme tail latency. To address tighter requirements of extreme tail latency, we propose a new design space for the problem, subsuming existing work and also proposing a new solution space. Existing work makes a prediction using features available at indexing time and focuses on optimizing prediction features for accelerating tail queries. In contrast, we identify when to predict?'' as another key optimization question. This opens up a new solution of delaying a prediction by a short duration to allow many short-running queries to complete without parallelization and, at the same time, to allow the predictor to collect a set of dynamic features using runtime information. This new question expands a solution space in two meaningful ways. First, we see a significant reduction of tail latency by leveraging dynamic'' features collected at runtime that estimate query execution time with higher accuracy. Second, we can ask whether to override prediction when the predictability'' is low. We show that considering predictability accelerates the query by achieving a higher recall. With this prediction, we propose to accelerate the queries that are predicted to be long-running. In our preliminary work, we focused on parallelization as an acceleration scenario. We extend to consider heterogeneous multicore hardware for acceleration. This hardware combines processor cores with different microarchitectures such as energy-efficient little cores and high-performance big cores, and accelerating web search using this hardware has remained an open problem. We evaluate the proposed prediction framework in two scenarios: (1) query parallelization on a multicore processor and (2) query scheduling on a heterogeneous processor. Our extensive evaluation results show that, for both scenarios of query acceleration using parallelization and heterogeneous cores, the proposed framework is effective in reducing the extreme tail latency compared to a start-of-the-art predictor because of its higher recall, and it improves server throughput by more than 70\% because of its improved precision.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Eraslan:2016:STA, author = "Sukru Eraslan and Yeliz Yesilada and Simon Harper", title = "Scanpath Trend Analysis on {Web} Pages: Clustering Eye Tracking Scanpaths", journal = j-TWEB, volume = "10", number = "4", pages = "20:1--20:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2970818", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Eye tracking studies have widely been used in improving the design and usability of web pages and in the research of understanding how users navigate them. However, there is limited research in clustering users' eye movement sequences (i.e., scanpaths) on web pages to identify a general direction they follow. Existing research tends to be reductionist, which means that the resulting path is so short that it is not useful. Moreover, there is little work on correlating users' scanpaths with visual elements of web pages and the underlying source code, which means the result cannot be used for further processing. In order to address these limitations, we introduce a new concept in clustering scanpaths called Scanpath Trend Analysis (STA) that not only considers the visual elements visited by all users, but also considers the visual elements visited by the majority in any order. We present an algorithm which automatically does this trend analysis to identify a trending scanpath for multiple web users in terms of visual elements of a web page. In contrast to existing research, the STA algorithm first analyzes the most visited visual elements in given scanpaths, clusters the scanpaths by arranging these visual elements based on their overall positions in the individual scanpaths, and then constructs a trending scanpath in terms of these visual elements. This algorithm was experimentally evaluated by an eye tracking study on six web pages for two different kinds of tasks (12 cases in total). Our experimental results show that the STA algorithm generates a trending scanpath that addresses the reductionist problem of existing work by preventing the loss of commonly visited visual elements for all cases. Based on the statistical tests, the STA algorithm also generates a trending scanpath that is significantly more similar to the inputted scanpaths compared to other existing work in 10 out of 12 cases. In the remaining cases, the STA algorithm still performs significantly better than some other existing work. This algorithm contributes to behavior analysis research on the web that can be used for different purposes: for example, re-engineering web pages guided by the trending scanpath to improve users' experience or guiding designers to improve their design.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Rafalak:2016:WCC, author = "Maria Rafalak and Dominik Deja and Adam Wierzbicki and Radoslaw Nielek and Michal Kakol", title = "{Web} Content Classification Using Distributions of Subjective Quality Evaluations", journal = j-TWEB, volume = "10", number = "4", pages = "21:1--21:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2994132", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Machine learning algorithms and recommender systems trained on human ratings are widely in use today. However, human ratings may be associated with a high level of uncertainty and are subjective, influenced by demographic or psychological factors. We propose a new approach to the design of object classes from human ratings: the use of entire distributions to construct classes. By avoiding aggregation for class definition, our approach loses no information and can deal with highly volatile or conflicting ratings. The approach is based the concept of the Earth Mover's Distance (EMD), a measure of distance for distributions. We evaluate the proposed approach based on four datasets obtained from diverse Web content or movie quality evaluation services or experiments. We show that clusters discovered in these datasets using the EMD measure are characterized by a consistent and simple interpretation. Quality classes defined using entire rating distributions can be fitted to clusters of distributions in the four datasets using two parameters, resulting in a good overall fit. We also consider the impact of the composition of small samples on the distributions that are the basis of our classification approach. We show that using distributions based on small samples of 10 evaluations is still robust to several demographic and psychological variables. This observation suggests that the proposed approach can be used in practice for quality evaluation, even for highly uncertain and subjective ratings.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Guo:2016:FEE, author = "Guangming Guo and Feida Zhu and Enhong Chen and Qi Liu and Le Wu and Chu Guan", title = "From Footprint to Evidence: an Exploratory Study of Mining Social Data for Credit Scoring", journal = j-TWEB, volume = "10", number = "4", pages = "22:1--22:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2996465", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "With the booming popularity of online social networks like Twitter and Weibo, online user footprints are accumulating rapidly on the social web. Simultaneously, the question of how to leverage the large-scale user-generated social media data for personal credit scoring comes into the sight of both researchers and practitioners. It has also become a topic of great importance and growing interest in the P2P lending industry. However, compared with traditional financial data, heterogeneous social data presents both opportunities and challenges for personal credit scoring. In this article, we seek a deep understanding of how to learn users' credit labels from social data in a comprehensive and efficient way. Particularly, we explore the social-data-based credit scoring problem under the micro-blogging setting for its open, simple, and real-time nature. To identify credit-related evidence hidden in social data, we choose to conduct an analytical and empirical study on a large-scale dataset from Weibo, the largest and most popular tweet-style website in China. Summarizing results from existing credit scoring literature, we first propose three social-data-based credit scoring principles as guidelines for in-depth exploration. In addition, we glean six credit-related insights arising from empirical observations of the testbed dataset. Based on the proposed principles and insights, we extract prediction features mainly from three categories of users' social data, including demographics, tweets, and networks. To harness this broad range of features, we put forward a two-tier stacking and boosting enhanced ensemble learning framework. Quantitative investigation of the extracted features shows that online social media data does have good potential in discriminating good credit users from bad. Furthermore, we perform experiments on the real-world Weibo dataset consisting of more than 7.3 million tweets and 200,000 users whose credit labels are known through our third-party partner. Experimental results show that (i) our approach achieves a roughly 0.625 AUC value with all the proposed social features as input, and (ii) our learning algorithm can outperform traditional credit scoring methods by as much as 17\% for social-data-based personal credit scoring.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bahri:2016:CCO, author = "Leila Bahri and Barbara Carminati and Elena Ferrari", title = "{COIP}-Continuous, Operable, Impartial, and Privacy-Aware Identity Validity Estimation for {OSN} Profiles", journal = j-TWEB, volume = "10", number = "4", pages = "23:1--23:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/3014338", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Identity validation of Online Social Networks' (OSNs') peers is a critical concern to the insurance of safe and secure online socializing environments. Starting from the vision of empowering users to determine the validity of OSN identities, we suggest a framework to estimate the trustworthiness of online social profiles based only on the information they contain. Our framework is based on learning identity correlations between profile attributes in an OSN community and on collecting ratings from OSN community members to evaluate the trustworthiness of target profiles. Our system guarantees utility, user anonymity, impartiality in rating, and operability within the dynamics and continuous evolution of OSNs. In this article, we detail the system design, and we prove its correctness against these claimed quality properties. Moreover, we test its effectiveness, feasibility, and efficiency through experimentation on real-world datasets from Facebook and Google+, in addition to using the Adults UCI dataset.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Das:2016:MAA, author = "Sanmay Das and Allen Lavoie and Malik Magdon-Ismail", title = "Manipulation among the Arbiters of Collective Intelligence: How {Wikipedia} Administrators Mold Public Opinion", journal = j-TWEB, volume = "10", number = "4", pages = "24:1--24:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/3001937", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Our reliance on networked, collectively built information is a vulnerability when the quality or reliability of this information is poor. Wikipedia, one such collectively built information source, is often our first stop for information on all kinds of topics; its quality has stood up to many tests, and it prides itself on having a neutral point of view.'' Enforcement of neutrality is in the hands of comparatively few, powerful administrators. In this article, we document that a surprisingly large number of editors change their behavior and begin focusing more on a particular controversial topic once they are promoted to administrator status. The conscious and unconscious biases of these few, but powerful, administrators may be shaping the information on many of the most sensitive topics on Wikipedia; some may even be explicitly infiltrating the ranks of administrators in order to promote their own points of view. In addition, we ask whether administrators who change their behavior in this suspicious manner can be identified in advance. Neither prior history nor vote counts during an administrator's election are useful in doing so, but we find that an alternative measure, which gives more weight to influential voters, can successfully reject these suspicious candidates. This second result has important implications for how we harness collective intelligence: even if wisdom exists in a collective opinion (like a vote), that signal can be lost unless we carefully distinguish the true expert voter from the noisy or manipulative voter.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Mukherjee:2017:ISV, author = "Partha Mukherjee and Bernard J. Jansen", title = "Information Sharing by Viewers Via Second Screens for In-Real-Life Events", journal = j-TWEB, volume = "11", number = "1", pages = "1:1--1:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3009970", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The use of second screen devices with social media facilitates conversational interaction concerning broadcast media events, creating what we refer to as the social soundtrack. In this research, we evaluate the change of the Super Bowl XLIX social soundtrack across three social media platforms on the topical categories of commercials, music, and game at three game phases ( Pre, During, and Post ). We perform statistical analysis on more than 3M, 800K, and 50K posts from Twitter, Instagram, and Tumblr, respectively. Findings show that the volume of posts in the During phase is fewer compared to Pre and Post phases; however, the hourly mean in the During phase is considerably higher than it is in the other two phases. We identify the predominant phase and category of interaction across all three social media sites. We also determine the significance of change in absolute scale across the Super Bowl categories (commercials, music, game) and in both absolute and relative scales across Super Bowl phases ( Pre, During, Post ) for the three social network platforms (Twitter, Tumblr, Instagram). Results show that significant phase-category relationships exist for all three social networks. The results identify the During phase as the predominant one for all three categories on all social media sites with respect to the absolute volume of conversations in a continuous scale. From the relative volume perspective, the During phase is highest for the music category for most social networks. For the commercials and game categories, however, the Post phase is higher than the During phase for Twitter and Instagram, respectively. Regarding category identification, the game category is the highest for Twitter and Instagram but not for Tumblr, which has dominant peaks for music and/or commercials in all three phases. It is apparent that different social media platforms offer various phase and category affordances. These results are important in identifying the influence that second screen technology has on information sharing across different social media platforms and indicates that the viewer role is transitioning from passive to more active.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Squicciarini:2017:TAO, author = "Anna Squicciarini and Cornelia Caragea and Rahul Balakavi", title = "Toward Automated Online Photo Privacy", journal = j-TWEB, volume = "11", number = "1", pages = "2:1--2:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2983644", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Online photo sharing is an increasingly popular activity for Internet users. More and more users are now constantly sharing their images in various social media, from social networking sites to online communities, blogs, and content sharing sites. In this article, we present an extensive study exploring privacy and sharing needs of users' uploaded images. We develop learning models to estimate adequate privacy settings for newly uploaded images, based on carefully selected image-specific features. Our study investigates both visual and textual features of images for privacy classification. We consider both basic image-specific features, commonly used for image processing, as well as more sophisticated and abstract visual features. Additionally, we include a visual representation of the sentiment evoked by images. To our knowledge, sentiment has never been used in the context of image classification for privacy purposes. We identify the smallest set of features, that by themselves or combined together with others, can perform well in properly predicting the degree of sensitivity of users' images. We consider both the case of binary privacy settings (i.e., public, private), as well as the case of more complex privacy options, characterized by multiple sharing options. Our results show that with few carefully selected features, one may achieve high accuracy, especially when high-quality tags are available.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Kang:2017:EMA, author = "Jeon-Hyung Kang and Kristina Lerman", title = "Effort Mediates Access to Information in Online Social Networks", journal = j-TWEB, volume = "11", number = "1", pages = "3:1--3:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2990506", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Individuals' access to information in a social network depends on how it is distributed and where in the network individuals position themselves. In addition, individuals vary in how much effort they invest in managing their social connections. Using data from a social media site, we study how the interplay between effort and network position affects social media users' access to diverse and novel information. Previous studies of the role of networks in information access were limited in their ability to measure the diversity of information. We address this problem by learning the topics of interest to social media users from the messages they share online with followers. We use the learned topics to measure the diversity of information users receive from the people they follow online. We confirm that users in structurally diverse network positions, which bridge otherwise disconnected regions of the follower network, tend to be exposed to more diverse and novel information. We also show that users who invest more effort in their activity on the site are not only located in more structurally diverse positions within the network than the less engaged users but also receive more novel and diverse information when in similar network positions. These findings indicate that the relationship between network structure and access to information in networks is more nuanced than previously thought.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Seneviratne:2017:SMA, author = "Suranga Seneviratne and Aruna Seneviratne and Mohamed Ali Kaafar and Anirban Mahanti and Prasant Mohapatra", title = "Spam Mobile Apps: Characteristics, Detection, and in the Wild Analysis", journal = j-TWEB, volume = "11", number = "1", pages = "4:1--4:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3007901", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The increased popularity of smartphones has attracted a large number of developers to offer various applications for the different smartphone platforms via the respective app markets. One consequence of this popularity is that the app markets are also becoming populated with spam apps. These spam apps reduce the users' quality of experience and increase the workload of app market operators to identify these apps and remove them. Spam apps can come in many forms such as apps not having a specific functionality, those having unrelated app descriptions or unrelated keywords, or similar apps being made available several times and across diverse categories. Market operators maintain antispam policies and apps are removed through continuous monitoring. Through a systematic crawl of a popular app market and by identifying apps that were removed over a period of time, we propose a method to detect spam apps solely using app metadata available at the time of publication. We first propose a methodology to manually label a sample of removed apps, according to a set of checkpoint heuristics that reveal the reasons behind removal. This analysis suggests that approximately 35\% of the apps being removed are very likely to be spam apps. We then map the identified heuristics to several quantifiable features and show how distinguishing these features are for spam apps. We build an Adaptive Boost classifier for early identification of spam apps using only the metadata of the apps. Our classifier achieves an accuracy of over 95\% with precision varying between 85\% and 95\% and recall varying between 38\% and 98\%. We further show that a limited number of features, in the range of 10--30, generated from app metadata is sufficient to achieve a satisfactory level of performance. On a set of 180,627 apps that were present at the app market during our crawl, our classifier predicts 2.7\% of the apps as potential spam. Finally, we perform additional manual verification and show that human reviewers agree with 82\% of our classifier predictions.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Uribe:2017:UWP, author = "Silvia Uribe and Federico {\'A}lvarez and Jos{\'e} Manuel Men{\'e}ndez", title = "User's {Web} Page Aesthetics Opinion: a Matter of Low-Level Image Descriptors Based on {MPEG-7}", journal = j-TWEB, volume = "11", number = "1", pages = "5:1--5:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3019595", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Analyzing a user's first impression of a Web site is essential for interface designers, as it is tightly related to their overall opinion of a site. In fact, this early evaluation affects user navigation behavior. Perceived usability and user interest (e.g., revisiting and recommending the site) are parameters influenced by first opinions. Thus, predicting the latter when creating a Web site is vital to ensure users' acceptance. In this regard, Web aesthetics is one of the most influential factors in this early perception. We propose the use of low-level image parameters for modeling Web aesthetics in an objective manner, which is an innovative research field. Our model, obtained by applying a stepwise multiple regression algorithm, infers a user's first impression by analyzing three different visual characteristics of Web site screenshots-texture, luminance, and color-which are directly derived from MPEG-7 descriptors. The results obtained over three wide Web site datasets (composed by 415, 42, and 6 Web sites, respectively) reveal a high correlation between low-level parameters and the users' evaluation, thus allowing a more precise and objective prediction of users' opinion than previous models that are based on other image characteristics with fewer predictors. Therefore, our model is meant to support a rapid assessment of Web sites in early stages of the design process to maximize the likelihood of the users' final approval.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Parra-Arnau:2017:MBT, author = "Javier Parra-Arnau and Jagdish Prasad Achara and Claude Castelluccia", title = "{MyAdChoices}: Bringing Transparency and Control to Online Advertising", journal = j-TWEB, volume = "11", number = "1", pages = "7:1--7:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2996466", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The intrusiveness and the increasing invasiveness of online advertising have, in the last few years, raised serious concerns regarding user privacy and Web usability. As a reaction to these concerns, we have witnessed the emergence of a myriad of ad-blocking and antitracking tools, whose aim is to return control to users over advertising. The problem with these technologies, however, is that they are extremely limited and radical in their approach: users can only choose either to block or allow all ads. With around 200 million people regularly using these tools, the economic model of the Web-in which users get content free in return for allowing advertisers to show them ads-is at serious peril. In this article, we propose a smart Web technology that aims at bringing transparency to online advertising, so that users can make an informed and equitable decision regarding ad blocking. The proposed technology is implemented as a Web-browser extension and enables users to exert fine-grained control over advertising, thus providing them with certain guarantees in terms of privacy and browsing experience, while preserving the Internet economic model. Experimental results in a real environment demonstrate the suitability and feasibility of our approach, and provide preliminary findings on behavioral targeting from real user browsing profiles.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wang:2017:VMC, author = "Tianyi Wang and Gang Wang and Bolun Wang and Divya Sambasivan and Zengbin Zhang and Xing Li and Haitao Zheng and Ben Y. Zhao", title = "Value and Misinformation in Collaborative Investing Platforms", journal = j-TWEB, volume = "11", number = "2", pages = "8:1--8:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3027487", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "It is often difficult to separate the highly capable experts'' from the average worker in crowdsourced systems. This is especially true for challenge application domains that require extensive domain knowledge. The problem of stock analysis is one such domain, where even the highly paid, well-educated domain experts are prone to make mistakes. As an extremely challenging problem space, the wisdom of the crowds'' property that many crowdsourced applications rely on may not hold. In this article, we study the problem of evaluating and identifying experts in the context of SeekingAlpha and StockTwits, two crowdsourced investment services that have recently begun to encroach on a space dominated for decades by large investment banks. We seek to understand the quality and impact of content on collaborative investment platforms, by empirically analyzing complete datasets of SeekingAlpha articles (9 years) and StockTwits messages (4 years). We develop sentiment analysis tools and correlate contributed content to the historical performance of relevant stocks. While SeekingAlpha articles and StockTwits messages provide minimal correlation to stock performance in aggregate, a subset of experts contribute more valuable (predictive) content. We show that these authors can be easily identified by user interactions, and investments based on their analysis significantly outperform broader markets. This effectively shows that even in challenging application domains, there is a secondary or indirect wisdom of the crowds. Finally, we conduct a user survey that sheds light on users' views of SeekingAlpha content and stock manipulation. We also devote efforts to identify potential manipulation of stocks by detecting authors controlling multiple identities.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Drutsa:2017:PUE, author = "Alexey Drutsa and Gleb Gusev and Pavel Serdyukov", title = "Periodicity in User Engagement with a Search Engine and Its Application to Online Controlled Experiments", journal = j-TWEB, volume = "11", number = "2", pages = "9:1--9:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2856822", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Nowadays, billions of people use the Web in connection with their daily needs. A significant part of these needs are constituted by search tasks that are usually addressed by search engines. Thus, daily search needs result in regular user engagement with a search engine. User engagement with web services was studied in various aspects, but there appears to be little work devoted to its regularity and periodicity. In this article, we study periodicity of user engagement with a popular search engine through applying spectrum analysis to temporal sequences of different engagement metrics. First, we found periodicity patterns of user engagement and revealed classes of users whose periodicity patterns do not change over a long period of time. In addition, we give an exhaustive analysis of the stability and quality of identified clusters. Second, we used the spectrum series as key metrics to evaluate search quality. We found that the novel periodicity metrics outperform the state-of-the-art quality metrics both in terms of significance level ( p -value) and sensitivity to a large set of larges-scale A/B experiments conducted on real search engine users.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Rahman:2017:AAC, author = "M. Rezaur Rahman and Jinyoung Han and Yong Jae Lee and Chen-Nee Chuah", title = "Analyzing the Adoption and Cascading Process of {OSN}-Based Gifting Applications: an Empirical Study", journal = j-TWEB, volume = "11", number = "2", pages = "10:1--10:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3023871", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "To achieve growth in the user base of online social networks--(OSN) based applications, word-of-mouth diffusion mechanisms, such as user-to-user invitations, are widely used. This article characterizes the adoption and cascading process of OSN-based applications that grow via user invitations. We analyze a detailed large-scale dataset of a popular Facebook gifting application, iHeart, that contains more than 2 billion entries of user activities generated by 190 million users during a span of 64 weeks. We investigate (1) how users invite their friends to an OSN-based application, (2) how application adoption of an individual user can be predicted, (3) what factors drive the cascading process of application adoptions, and (4) what are the good predictors of the ultimate cascade sizes. We find that sending or receiving a large number of invitations does not necessarily help to recruit new users to iHeart. We also find that the average success ratio of inviters is the most important feature in predicting an adoption of an individual user, which indicates that the effectiveness of inviters has strong predictive power with respect to application adoption. Based on the lessons learned from our analyses, we build and evaluate learning-based models to predict whether a user will adopt iHeart. Our proposed model that utilizes additional activity information of individual users from other similar types of gifting applications can achieve high precision (83\%) in predicting adoptions in the target application (i.e., iHeart). We next identify a set of distinctive features that are good predictors of the growth of the application adoptions in terms of final population size. We finally propose a prediction model to infer whether a cascade of application adoption will continue to grow in the future based on observing the initial adoption process. Results show that our proposed model can achieve high precision (over 80\%) in predicting large cascades of application adoptions. We believe our work can give an important implication in resource allocation of OSN-based product stakeholders, for example, via targeted marketing.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Matsubara:2017:NDI, author = "Yasuko Matsubara and Yasushi Sakurai and B. Aditya Prakash and Lei Li and Christos Faloutsos", title = "Nonlinear Dynamics of Information Diffusion in Social Networks", journal = j-TWEB, volume = "11", number = "2", pages = "11:1--11:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3057741", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/string-matching.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The recent explosion in the adoption of search engines and new media such as blogs and Twitter have facilitated the faster propagation of news and rumors. How quickly does a piece of news spread over these media? How does its popularity diminish over time? Does the rising and falling pattern follow a simple universal law? In this article, we propose SpikeM, a concise yet flexible analytical model of the rise and fall patterns of information diffusion. Our model has the following advantages. First, unification power: it explains earlier empirical observations and generalizes theoretical models including the SI and SIR models. We provide the threshold of the take-off versus die-out conditions for SpikeM and discuss the generality of our model by applying it to an arbitrary graph topology. Second, practicality: it matches the observed behavior of diverse sets of real data. Third, parsimony: it requires only a handful of parameters. Fourth, usefulness: it makes it possible to perform analytic tasks such as forecasting, spotting anomalies, and interpretation by reverse engineering the system parameters of interest (quality of news, number of interested bloggers, etc.). We also introduce an efficient and effective algorithm for the real-time monitoring of information diffusion, namely SpikeStream, which identifies multiple diffusion patterns in a large collection of online event streams. Extensive experiments on real datasets demonstrate that SpikeM accurately and succinctly describes all patterns of the rise and fall spikes in social networks.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Rojas-Galeano:2017:OOO, author = "Sergio Rojas-Galeano", title = "On Obstructing Obscenity Obfuscation", journal = j-TWEB, volume = "11", number = "2", pages = "12:1--12:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3032963", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Obscenity (the use of rude words or offensive expressions) has spread from informal verbal conversations to digital media, becoming increasingly common on user-generated comments found in Web forums, newspaper user boards, social networks, blogs, and media-sharing sites. The basic obscenity-blocking mechanism is based on verbatim comparisons against a blacklist of banned vocabulary; however, creative users circumvent these filters by obfuscating obscenity with symbol substitutions or bogus segmentations that still visually preserve the original semantics, such as writing shit as {\em \$h!t\/} or {\em s.h.i.t\/} or
even worse mixing them as {\em \.h....!.t\/}. The number of potential obfuscated variants is combinatorial, yielding the verbatim filter impractical. Here we describe a method intended to obstruct this anomaly inspired by sequence alignment algorithms used in genomics, coupled with a tailor-made edit penalty function. The method only requires to set up the vocabulary of plain obscenities; no further training is needed. Its complexity on screening a single obscenity is linear, both in runtime and memory, on the length of the user-generated text. We validated the method on three different experiments. The first one involves a new dataset that is also introduced in this article; it consists of a set of manually annotated real-life comments in Spanish, gathered from the news user boards of an online newspaper, containing this type of obfuscation. The second one is a publicly available dataset of comments in Portuguese from a sports Web site. In these experiments, at the obscenity level, we observed recall rates greater than 90\%, whereas precision rates varied between 75\% and 95\%, depending on their sequence length (shorter lengths yielded a higher number of false alarms). On the other hand, at the comment level, we report recall of 86\%, precision of 91\%, and specificity of 98\%. The last experiment revealed that the method is more effective in matching this type of obfuscation compared to the classical Levenshtein edit distance. We conclude discussing the prospects of the method to help enforcing moderation rules of obscenity expressions or as a preprocessing mechanism for sequence cleaning and/or feature extraction in more sophisticated text categorization techniques.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Xu:2017:EIE, author = "Haitao Xu and Daiping Liu and Haining Wang and Angelos Stavrou", title = "An Empirical Investigation of Ecommerce-Reputation-Escalation-as-a-Service", journal = j-TWEB, volume = "11", number = "2", pages = "13:1--13:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2983646", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In online markets, a store's reputation is closely tied to its profitability. Sellers' desire to quickly achieve a high reputation has fueled a profitable underground business that operates as a specialized crowdsourcing marketplace and accumulates wealth by allowing online sellers to harness human laborers to conduct fake transactions to improve their stores' reputations. We term such an underground market a seller-reputation-escalation (SRE) market. In this article, we investigate the impact of the SRE service on reputation escalation by performing in-depth measurements of the prevalence of the SRE service, the business model and market size of SRE markets, and the characteristics of sellers and offered laborers. To this end, we have infiltrated five SRE markets and studied their operations using daily data collection over a continuous period of 2 months. We identified more than 11,000 online sellers posting at least 219,165 fake-purchase tasks on the five SRE markets. These transactions earned at least \46,438 in revenue
for the five SRE markets, and the total value of
merchandise involved exceeded \$3,452,530. Our study demonstrates that online sellers using the SRE service can increase their stores' reputations at least 10 times faster than legitimate ones while about 25\% of them were visibly penalized. Even worse, we found a much stealthier and more hazardous service that can, within a single day, boost a seller's reputation by such a degree that would require a legitimate seller at least a year to accomplish. Armed with our analysis of the operational characteristics of the underground economy, we offer some insights into potential mitigation strategies. Finally, we revisit the SRE ecosystem 1 year later to evaluate the latest dynamism of the SRE markets, especially the statuses of the online stores once identified to launch fake-transaction campaigns on the SRE markets. We observe that the SRE markets are not as active as they were 1 year ago and about 17\% of the involved online stores become inaccessible likely because they have been forcibly shut down by the corresponding E-commerce marketplace for conducting fake transactions.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Singer:2017:BMC, author = "Philipp Singer and Denis Helic and Andreas Hotho and Markus Strohmaier", title = "A {Bayesian} Method for Comparing Hypotheses About Human Trails", journal = j-TWEB, volume = "11", number = "3", pages = "14:1--14:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3054950", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "When users interact with the Web today, they leave sequential digital trails on a massive scale. Examples of such human trails include Web navigation, sequences of online restaurant reviews, or online music play lists. Understanding the factors that drive the production of these trails can be useful, for example, for improving underlying network structures, predicting user clicks, or enhancing recommendations. In this work, we present a method called HypTrails for comparing a set of hypotheses about human trails on the Web, where hypotheses represent beliefs about transitions between states. Our method utilizes Markov chain models with Bayesian inference. The main idea is to incorporate hypotheses as informative Dirichlet priors and to calculate the evidence of the data under them. For eliciting Dirichlet priors from hypotheses, we present an adaption of the so-called (trial) roulette method, and to compare the relative plausibility of hypotheses, we employ Bayes factors. We demonstrate the general mechanics and applicability of HypTrails by performing experiments with (i) synthetic trails for which we control the mechanisms that have produced them and (ii) empirical trails stemming from different domains including Web site navigation, business reviews, and online music played. Our work expands the repertoire of methods available for studying human trails.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Vahedian:2017:MRH, author = "Fatemeh Vahedian and Robin Burke and Bamshad Mobasher", title = "Multirelational Recommendation in Heterogeneous Networks", journal = j-TWEB, volume = "11", number = "3", pages = "15:1--15:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3054952", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Recommender systems are key components in information-seeking contexts where personalization is sought. However, the dominant framework for recommendation is essentially two dimensional, with the interaction between users and items characterized by a single relation. In many cases, such as social networks, users and items are joined in a complex web of relations, not readily reduced to a single value. Recent multirelational approaches to recommendation focus on the direct, proximal relations in which users and items may participate. Our approach uses the framework of complex heterogeneous networks to represent such recommendation problems. We propose the weighted hybrid of low-dimensional recommenders (WHyLDR) recommendation model, which uses extended relations, represented as constrained network paths, to effectively augment direct relations. This model incorporates influences from both distant and proximal connections in the network. The WHyLDR approach raises the problem of the unconstrained proliferation of components, built from ever-extended network paths. We show that although component utility is not strictly monotonic with path length, a measure based on information gain can effectively prune and optimize such hybrids.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Sariyuce:2017:NDI, author = "Ahmet Erdem Sariy{\"u}ce and C. Seshadhri and Ali Pinar and {\"U}mit V. {\c{C}}ataly{\"u}rek", title = "Nucleus Decompositions for Identifying Hierarchy of Dense Subgraphs", journal = j-TWEB, volume = "11", number = "3", pages = "16:1--16:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3057742", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Finding dense substructures in a graph is a fundamental graph mining operation, with applications in bioinformatics, social networks, and visualization to name a few. Yet most standard formulations of this problem (like clique, quasi-clique, densest at-least- k subgraph) are NP-hard. Furthermore, the goal is rarely to find the true optimum'' but to identify many (if not all) dense substructures, understand their distribution in the graph, and ideally determine relationships among them. Current dense subgraph finding algorithms usually optimize some objective and only find a few such subgraphs without providing any structural relations. We define the nucleus decomposition of a graph, which represents the graph as a forest of nuclei. Each nucleus is a subgraph where smaller cliques are present in many larger cliques. The forest of nuclei is a hierarchy by containment, where the edge density increases as we proceed towards leaf nuclei. Sibling nuclei can have limited intersections, which enables discovering overlapping dense subgraphs. With the right parameters, the nucleus decomposition generalizes the classic notions of$k$-core and$k$-truss decompositions. We present practical algorithms for nucleus decompositions and empirically evaluate their behavior in a variety of real graphs. The tree of nuclei consistently gives a global, hierarchical snapshot of dense substructures and outputs dense subgraphs of comparable quality with the state-of-the-art solutions that are dense and have non-trivial sizes. Our algorithms can process real-world graphs with tens of millions of edges in less than an hour. We demonstrate how proposed algorithms can be utilized on a citation network. Our analysis showed that dense units identified by our algorithms correspond to coherent articles on a specific area. Our experiments also show that we can identify dense structures that are lost within larger structures by other methods and find further finer grain structure within dense groups.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Kanza:2017:LBD, author = "Yaron Kanza and Elad Kravi and Eliyahu Safra and Yehoshua Sagiv", title = "Location-Based Distance Measures for Geosocial Similarity", journal = j-TWEB, volume = "11", number = "3", pages = "17:1--17:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3054951", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article investigates the problem of geosocial similarity among users of online social networks, based on the locations of their activities (e.g., posting messages or photographs). Finding pairs of geosocially similar users or detecting that two sets of locations (of activities) belong to the same user has important applications in privacy protection, recommendation systems, urban planning, and public health, among others. It is explained and shown empirically that common distance measures between sets of locations are inadequate for determining geosocial similarity. Two novel distance measures between sets of locations are introduced. One is the mutually nearest distance that is based on computing a matching between two sets. The second measure uses a quad-tree index. It is highly scalable but incurs the overhead of creating and maintaining the index. Algorithms with optimization techniques are developed for computing the two distance measures and also for finding the$k$-most-similar users of a given one. Extensive experiments, using geotagged messages from Twitter, show that the new distance measures are both more accurate and more efficient than existing ones.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Panagopoulos:2017:MER, author = "A. Panagopoulos and E. Koutrouli and A. Tsalgatidou", title = "Modeling and Evaluating a Robust Feedback-Based Reputation System for E-Commerce Platforms", journal = j-TWEB, volume = "11", number = "3", pages = "18:1--18:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3057265", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Despite the steady growth of e-commerce communities in the past two decades, little has changed in the way these communities manage reputation for building trust and for protecting their member's financial interests against fraud. As these communities mature and the defects of their reputation systems are revealed, further potential for deception against their members is created, that pushes the need for novel reputation mechanisms. Although a high volume of research works has explored the concepts of reputation and trust in e-communities, most of the proposed reputation systems target decentralized e-communities, focusing on issues related with the decentralized reputation management; they have not thus been integrated in e-commerce platforms. This work's objective is to provide an attackresilient feedback-based reputation system for modern e-commerce platforms, while minimizing the incurred financial burden of potent security schemes. Initially, we discuss a series of attacks and issues in reputation systems and study the different approaches of these problems from related works, while also considering the structural properties, defense mechanisms and policies of existing platforms. Then we present our proposition for a robust reputation system which consists of a novel reputation metric and attack prevention mechanisms. Finally, we describe the simulation framework and tool that we have implemented for thoroughly testing and evaluating the metric's resilience against attacks and present the evaluation experiments and their results. We consider the presented simulation framework as the second contribution of our article, aiming at facilitating the simulation and elaborate evaluation of reputation systems which specifically target e-commerce platforms by thoroughly presenting it, exhibiting its usage and making it available to the research community.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bianchini:2017:WMD, author = "Devis Bianchini and Valeria {De Antonellis} and Michele Melchiori", title = "{WISeR}: a Multi-Dimensional Framework for Searching and Ranking {Web APIs}", journal = j-TWEB, volume = "11", number = "3", pages = "19:1--19:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3061710", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Mashups are agile applications that aggregate RESTful services, developed by third parties, whose functions are exposed as Web Application Program Interfaces (APIs) within public repositories. From mashups developers' viewpoint, Web API search may benefit from selection criteria that combine several dimensions used to describe the APIs, such as categories, tags, and technical features (e.g., protocols and data formats). Nevertheless, other dimensions might be fruitfully exploited to support Web API search. Among them, past API usage experiences by other developers may be used to suggest the right APIs for a target application. Past experiences might emerge from the co-occurrence of Web APIs in the same mashups. Ratings assigned by developers after using the Web APIs to create their own mashups or after using mashups developed by others can be considered as well. This article aims to advance the current state of the art for Web API search and ranking from mashups developers' point of view, by addressing two key issues: multi-dimensional modeling and multi-dimensional framework for selection. The model for Web API characterization embraces multiple descriptive dimensions, by considering several public repositories, that focus on different and only partially overlapping dimensions. The proposed Web API selection framework, called WISeR (Web apI Search and Ranking), is based on functions devoted to developers to exploit the multi-dimensional descriptions, in order to enhance the identification of candidate Web APIs to be proposed, according to the given requirements. Furthermore, WISeR adapts to changes that occur during the Web API selection and mashup development, by revising the dimensional attributes in order to conform to developers' preferences and constraints. We also present an experimental evaluation of the framework.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Rocha:2017:LPL, author = "Andr{\'e} Rocha and C{\'a}ssio Prazeres", title = "{LDoW--PaN}: Linked Data on the {Web}-Presentation and Navigation", journal = j-TWEB, volume = "11", number = "4", pages = "20:1--20:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2983643", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This work aimed to propose LDoW-PaN, a Linked Data presentation and navigation model focused on the average user. The LDoW-PaN model is an extension of the Dexter Hypertext Reference Model. Through the LDoW-PaN model, ordinary people-who have no experience with technologies that involve the Linked Data environment-can interact with the Web of Data (RDF) more closely related to how they interact with the Web of Documents (HTML). To evaluate the proposal, some tools were developed, including the following: (i) a Web Service, which implements the lower-level layers of the LDoW-PaN model; (ii) a client-side script library, which implements the presentation and navigation layer; and (iii) a browser extension, which uses these tools to provide Linked Data presentation and navigation to users browsing the Web. The browser extension was developed using user interface approaches that are well known, well accepted, and evaluated by the Web research community, such as faceted navigation and presentation through tooltips. Therefore, the prototype evaluation included: usability evaluation through two classical techniques; computational complexity measures; and an analysis of the performance of the operations provided by the proposed model.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wang:2017:CUB, author = "Gang Wang and Xinyi Zhang and Shiliang Tang and Christo Wilson and Haitao Zheng and Ben Y. Zhao", title = "Clickstream User Behavior Models", journal = j-TWEB, volume = "11", number = "4", pages = "21:1--21:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3068332", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The next generation of Internet services is driven by users and user-generated content. The complex nature of user behavior makes it highly challenging to manage and secure online services. On one hand, service providers cannot effectively prevent attackers from creating large numbers of fake identities to disseminate unwanted content (e.g., spam). On the other hand, abusive behavior from real users also poses significant threats (e.g., cyberbullying). In this article, we propose clickstream models to characterize user behavior in large online services. By analyzing clickstream traces (i.e., sequences of click events from users), we seek to achieve two goals: (1) detection: to capture distinct user groups for the detection of malicious accounts, and (2) understanding: to extract semantic information from user groups to understand the captured behavior. To achieve these goals, we build two related systems. The first one is a semisupervised system to detect malicious user accounts (Sybils). The core idea is to build a clickstream similarity graph where each node is a user and an edge captures the similarity of two users' clickstreams. Based on this graph, we propose a coloring scheme to identify groups of malicious accounts without relying on a large labeled dataset. We validate the system using ground-truth clickstream traces of 16,000 real and Sybil users from Renren, a large Chinese social network. The second system is an unsupervised system that aims to capture and understand the fine-grained user behavior. Instead of binary classification (malicious or benign), this model identifies the natural groups of user behavior and automatically extracts features to interpret their semantic meanings. Applying this system to Renren and another online social network, Whisper (100K users), we help service providers identify unexpected user behaviors and even predict users' future actions. Both systems received positive feedback from our industrial collaborators including Renren, LinkedIn, and Whisper after testing on their internal clickstream data.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Hogan:2017:CFI, author = "Aidan Hogan", title = "Canonical Forms for Isomorphic and Equivalent {RDF} Graphs: Algorithms for Leaning and Labelling Blank Nodes", journal = j-TWEB, volume = "11", number = "4", pages = "22:1--22:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3068333", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Existential blank nodes greatly complicate a number of fundamental operations on Resource Description Framework (RDF) graphs. In particular, the problems of determining if two RDF graphs have the same structure modulo blank node labels (i.e., if they are isomorphic ), or determining if two RDF graphs have the same meaning under simple semantics (i.e., if they are simple-equivalent ), have no known polynomial-time algorithms. In this article, we propose methods that can produce two canonical forms of an RDF graph. The first canonical form preserves isomorphism such that any two isomorphic RDF graphs will produce the same canonical form; this iso-canonical form is produced by modifying the well-known canonical labelling algorithm N auty for application to RDF graphs. The second canonical form additionally preserves simple-equivalence such that any two simple-equivalent RDF graphs will produce the same canonical form; this equi-canonical form is produced by, in a preliminary step, leaning the RDF graph, and then computing the iso-canonical form. These algorithms have a number of practical applications, such as for identifying isomorphic or equivalent RDF graphs in a large collection without requiring pairwise comparison, for computing checksums or signing RDF graphs, for applying consistent Skolemisation schemes where blank nodes are mapped in a canonical manner to Internationalised Resource Identifiers (IRIs), and so forth. Likewise a variety of algorithms can be simplified by presupposing RDF graphs in one of these canonical forms. Both algorithms require exponential steps in the worst case; in our evaluation we demonstrate that there indeed exist difficult synthetic cases, but we also provide results over 9.9 million RDF graphs that suggest such cases occur infrequently in the real world, and that both canonical forms can be efficiently computed in all but a handful of such cases.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Koutrika:2017:SWP, author = "Georgia Koutrika and Qian Lin", title = "A Study of {Web} Print: What People Print in the Digital Era", journal = j-TWEB, volume = "11", number = "4", pages = "23:1--23:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3068331", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article analyzes a proprietary log of printed web pages and aims at answering questions regarding the content people print (what), the reasons they print (why), as well as attributes of their print profile (who). We present a classification of pages printed based on their print intent and we describe our methodology for processing the print dataset used in this study. In our analysis, we study the web sites, topics, and print intent of the pages printed along the following aspects: popularity, trends, activity, user diversity, and consistency. We present several findings that reveal interesting insights into printing. We analyze our findings and discuss their impact and directions for future work.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bernaschi:2017:EAT, author = "Massimo Bernaschi and Alessandro Celestini and Stefano Guarino and Flavio Lombardi", title = "Exploring and Analyzing the {Tor} Hidden Services Graph", journal = j-TWEB, volume = "11", number = "4", pages = "24:1--24:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3008662", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The exploration and analysis of Web graphs has flourished in the recent past, producing a large number of relevant and interesting research results. However, the unique characteristics of the Tor network limit the applicability of standard techniques and demand for specific algorithms to explore and analyze it. The attention of the research community has focused on assessing the security of the Tor infrastructure (i.e., its ability to actually provide the intended level of anonymity) and on discussing what Tor is currently being used for. Since there are no foolproof techniques for automatically discovering Tor hidden services, little or no information is available about the topology of the Tor Web graph. Even less is known on the relationship between content similarity and topological structure. The present article aims at addressing such lack of information. Among its contributions: a study on automatic Tor Web exploration/data collection approaches; the adoption of novel representative metrics for evaluating Tor data; a novel in-depth analysis of the hidden services graph; a rich correlation analysis of hidden services' semantics and topology. Finally, a broad interesting set of novel insights/considerations over the Tor Web organization and content are provided.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Xu:2017:COF, author = "Chang Xu and Jie Zhang", title = "Collusive Opinion Fraud Detection in Online Reviews: a Probabilistic Modeling Approach", journal = j-TWEB, volume = "11", number = "4", pages = "25:1--25:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3098859", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We address the collusive opinion fraud problem in online review portals, where groups of people work together to deliver deceptive reviews for manipulating the reputations of targeted items. Such collusive fraud is considered much harder to defend against, since the participants (or colluders) can evade detection by shaping their behaviors collectively so as not to appear suspicious. To alleviate this problem, countermeasures have been proposed that leverage the collective behaviors of colluders. The motivation stems from the observation that colluders typically act in a very synchronized way, as they are instructed by the same campaigns with common items to target and schedules to follow. However, the collective behaviors examined in existing solutions focus mostly on the external appearance of fraud campaigns, such as the campaign size and the size of the targeted item set. These signals may become ineffective once colluders have changed their behaviors collectively. Moreover, the detection algorithms used in existing approaches are designed to only make collusion inference on the input data; predictive models that can be deployed for detecting emerging fraud cannot be learned from the data. In this article, to complement existing studies on collusive opinion fraud characterization and detection, we explore more subtle behavioral trails in collusive fraud practice. In particular, a suite of homogeneity-based measures are proposed to capture the interrelationships among colluders within campaigns. Moreover, a novel statistical model is proposed to further characterize, recognize, and predict collusive fraud in online reviews. The proposed model is fully unsupervised and highly flexible to incorporate effective measures available for better modeling and prediction. Through experiments on two real-world datasets, we show that our method outperforms the state of the art in both characterization and detection abilities.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Chattopadhyay:2017:FSM, author = "Soumi Chattopadhyay and Ansuman Banerjee and Nilanjan Banerjee", title = "A Fast and Scalable Mechanism for {Web} Service Composition", journal = j-TWEB, volume = "11", number = "4", pages = "26:1--26:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3098884", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In recent times, automated business processes and web services have become ubiquitous in diverse application spaces. Efficient composition of web services in real time while providing necessary Quality of Service (QoS) guarantees is a computationally complex problem and several heuristic based approaches have been proposed to compose the services optimally. In this article, we present the design of a scalable QoS-aware service composition mechanism that balances the computational complexity of service composition with the QoS guarantees of the composed service and achieves scalability. Our design guarantees a single QoS parameter using an intelligent search and pruning mechanism in the composed service space. We also show that our methodology yields near optimal solutions on real benchmarks. We then enhance our proposed mechanism to guarantee multiple QoS parameters using aggregation techniques. Finally, we explore search time versus solution quality tradeoff using parameterized search algorithms that produce better-quality solutions at the cost of delay. We present experimental results to show the efficiency of our proposed mechanism.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{He:2018:EET, author = "Ming He and Yong Ge and Enhong Chen and Qi Liu and Xuesong Wang", title = "Exploring the Emerging Type of Comment for Online Videos: {DanMu}", journal = j-TWEB, volume = "12", number = "1", pages = "1:1--1:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3098885", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:00 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "DanMu, an emerging type of user-generated comment, has become increasingly popular in recent years. Many online video platforms such as Tudou.com have provided the DanMu function. Unlike traditional online reviews such as reviews at Youtube.com that are outside the videos, DanMu is a scrolling marquee comment, which is overlaid directly on top of the video and synchronized to a specific playback time. Such comments are displayed as streams of moving subtitles overlaid on the video screen. Viewers could easily write DanMu s while watching videos, and the written DanMu s will be immediately overlaid onto the video and displayed to writers themselves and other viewers as well. Such DanMu systems have greatly enabled users to communicate with each other in a much more direct way, creating a real-time sharing experience. Although there are several unique features of DanMu and has had a great impact on online video systems, to the best of our knowledge, there is no work that has provided a comprehensive study on DanMu. In this article, as a pilot study, we analyze the unique characteristics of DanMu from various perspectives. Specifically, we first illustrate some unique distributions of DanMu s by comparing with traditional reviews (TReviews) that we collected from a real DanMu -enabled online video system. Second, we discover two interesting patterns in DanMu data: a herding effect and multiple-burst phenomena that are significantly different from those in TRviews and reveal important insights about the growth of DanMu s on a video. Towards exploring antecedents of both th herding effect and multiple-burst phenomena, we propose to further detect leading DanMu s within bursts, because those leading DanMu s make the most contribution to both patterns. A framework is proposed to detect leading DanMu s that effectively combines multiple factors contributing to leading DanMu s. Based on the identified characteristics of DanMu, finally we propose to predict the distribution of future DanMu s (i.e., the growth of DanMu s), which is important for many DanMu -enabled online video systems, for example, the predicted DanMu distribution could be an indicator of video popularity. This prediction task includes two aspects: One is to predict which videos future DanMu s will be posted for, and the other one is to predict which segments of a video future DanMu s will be posted on. We develop two sophisticated models to solve both problems. Finally, intensive experiments are conducted with a real-world dataset to validate all methods developed in this article.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Minervini:2018:AKP, author = "Pasquale Minervini and Volker Tresp and Claudia D'amato and Nicola Fanizzi", title = "Adaptive Knowledge Propagation in {Web} Ontologies", journal = j-TWEB, volume = "12", number = "1", pages = "2:1--2:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3105961", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:00 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We focus on the problem of predicting missing assertions in Web ontologies. We start from the assumption that individual resources that are similar in some aspects are more likely to be linked by specific relations: this phenomenon is also referred to as homophily and emerges in a variety of relational domains. In this article, we propose a method for (1) identifying which relations in the ontology are more likely to link similar individuals and (2) efficiently propagating knowledge across chains of similar individuals. By enforcing sparsity in the model parameters, the proposed method is able to select only the most relevant relations for a given prediction task. Our experimental evaluation demonstrates the effectiveness of the proposed method in comparison to state-of-the-art methods from the literature.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liu:2018:RCW, author = "Yining Liu and Yong Liu and Yanming Shen and Keqiu Li", title = "Recommendation in a Changing World: Exploiting Temporal Dynamics in Ratings and Reviews", journal = j-TWEB, volume = "12", number = "1", pages = "3:1--3:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3108238", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:00 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Users' preferences, and consequently their ratings and reviews to items, change over time. Likewise, characteristics of items are also time-varying. By dividing data into time periods, temporal Recommender Systems (RSs) improve recommendation accuracy by exploring the temporal dynamics in user rating data. However, temporal RSs have to cope with rating sparsity in each time period. Meanwhile, reviews generated by users contain rich information about their preferences, which can be exploited to address rating sparsity and further improve the performance of temporal RSs. In this article, we develop a temporal rating model with topics that jointly mines the temporal dynamics of both user-item ratings and reviews. Studying temporal drifts in reviews helps us understand item rating evolutions and user interest changes over time. Our model also automatically splits the review text in each time period into interim words and intrinsic words. By linking interim words and intrinsic words to short-term and long-term item features, respectively, we jointly mine the temporal changes in user and item latent features together with the associated review text in a single learning stage. Through experiments on 28 real-world datasets collected from Amazon, we show that the rating prediction accuracy of our model significantly outperforms the existing state-of-art RS models. And our model can automatically identify representative interim words in each time period as well as intrinsic words across all time periods. This can be very useful in understanding the time evolution of users' preferences and items' characteristics.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Tu:2018:ARP, author = "Wenting Tu and David W. Cheung and Nikos Mamoulis and Min Yang and Ziyu Lu", title = "Activity Recommendation with Partners", journal = j-TWEB, volume = "12", number = "1", pages = "4:1--4:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3121407", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:00 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Recommending social activities, such as watching movies or having dinner, is a common function found in social networks or e-commerce sites. Besides certain websites which manage activity-related locations (e.g., foursquare.com), many items on product sale platforms (e.g., groupon.com) can naturally be mapped to social activities. For example, movie tickets can be thought of as activity items, which can be mapped as a social activity of watch a movie.'' Traditional recommender systems estimate the degree of interest for a target user on candidate items (or activities), and accordingly, recommend the top-$k\$ activity items to
the user. However, these systems ignore an important
social characteristic of recommended activities: people
usually tend to participate in those activities with
the effectiveness of recommendation in two directions.
First, we study the problem of activity-partner
recommendation; i.e., for each recommended activity
item, find a suitable partner for the user. This (i)
saves the user's time for finding activity partners,
(ii) increases the likelihood that the activity item
will be selected by the user, and (iii) improves the
effectiveness of recommender systems to users overall
and enkindles their social enthusiasm. Our partner
recommender is built upon the users' historical
attendance preferences, their social context, and
geographic information. Moreover, we explore how to
leverage the partner recommendation to help improve the
effectiveness of recommending activities to users.
Assuming that users tend to select the activities for
which they can find suitable partners, we propose a
partner-aware activity recommendation model, which
integrates this hypothesis into conventional
recommendation approaches. Finally, the recommended
items not only match users' interests, but also have
high chances to be selected by the users, because the
users can find suitable partners to attend the
corresponding activities together. We conduct
experiments on real data to evaluate the effectiveness
of activity-partner recommendation and partner-aware
activity recommendation. The results verify that (i)
suggesting partners greatly improves the likelihood
that a recommended activity item is to be selected by
the target user and (ii) considering the existence of
suitable partners in the ranking of recommended items
improves the accuracy of recommendation
significantly.",
acknowledgement = ack-nhfb,
articleno =    "4",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Dutta:2018:CRM,
author =       "Kaushik Dutta and Debra Vandermeer",
title =        "Caching to Reduce Mobile App Energy Consumption",
journal =      j-TWEB,
volume =       "12",
number =       "1",
pages =        "5:1--5:??",
month =        feb,
year =         "2018",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/3125778",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Jun 28 14:10:00 MDT 2018",
bibsource =    "http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Mobile applications consume device energy for their
operations, and the fast rate of battery depletion on
mobile devices poses a major usability hurdle. After
the display, data communication is the second-biggest
consumer of mobile device energy. At the same time,
software applications that run on mobile devices
represent a fast-growing product segment. Typically,
these applications serve as front-end display
mechanisms, which fetch data from remote servers and
display the information to the user in an appropriate
format-incurring significant data communication
overheads in the process. In this work, we propose
methods to reduce energy overheads in mobile devices
due to data communication by leveraging data caching
technology. A review of existing caching mechanisms
revealed that they are primarily designed for
optimizing response time performance and cannot be
easily ported to mobile devices for energy savings.
client-server and mobile communications infrastructures
make the use of existing caching technologies
propose a set of two new caching approaches
specifically designed with the constraints of mobile
devices in mind: (a) a response caching approach and
(b) an object caching approach. Our experiments show
that, even for a small cache size of 250MB, object
caching can reduce energy consumption on average by
45\% compared to the no-cache case, and response
caching can reduce energy consumption by 20\% compared
to the no-cache case. The benefits increase with larger
cache sizes. These results demonstrate the efficacy of
our proposed method and raise the possibility of
significantly extending mobile device battery life.",
acknowledgement = ack-nhfb,
articleno =    "5",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Manta-Caro:2018:MSW,
author =       "Cristyan Manta-Caro and Juan M. Fern{\'a}ndez-Luna",
title =        "Modeling and Simulating the {Web of Things} from an
Information Retrieval Perspective",
journal =      j-TWEB,
volume =       "12",
number =       "1",
pages =        "6:1--6:??",
month =        feb,
year =         "2018",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/3132732",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Jun 28 14:10:00 MDT 2018",
bibsource =    "http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Internet and Web technologies have changed our lives
in ways we are not yet fully aware of. In the near
future, Internet will interconnect more than 50 billion
things in the real world, nodes will sense billions of
features and properties of interest, and things will be
represented by web-based, bi-directional services with
highly dynamic content and real-time data. This is the
new era of the Internet and the Web of Things. Since
the emergence of such paradigms implies the evolution
and integration of the systems with which they
interact, it is essential to develop abstract models
for representing and simulating the Web of Things in
describes a Web of Things model based on a structured
XML representation. We also present a simulator whose
ultimate goal is to encapsulate the expected dynamics
of the Web of Things for the future development of
information retrieval (IR) systems. The simulator
generates a real-time collection of XML documents
containing spatio-temporal contexts and textual and
sensed information of highly dynamic dimensions. The
simulator is characterized by its flexibility and
versatility for representing real-world scenarios and
offers a unique perspective for information retrieval.
terms of its performance variables for computing
resource consumption and present our experimentation
with the simulator on three real scenarios by
considering the generation variables for the IR
document collection.",
acknowledgement = ack-nhfb,
articleno =    "6",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Davison:2018:LTR,
author =       "Brian D. Davison",
title =        "List of 2016 {TWEB} Reviewers",
journal =      j-TWEB,
volume =       "12",
number =       "1",
pages =        "7:1--7:??",
month =        feb,
year =         "2018",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/3180440",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Jun 28 14:10:00 MDT 2018",
bibsource =    "http://www.math.utah.edu/pub/tex/bib/tweb.bib",
acknowledgement = ack-nhfb,
articleno =    "7",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Davison:2018:E,
author =       "Brian D. Davison",
title =        "Editorial",
journal =      j-TWEB,
volume =       "12",
number =       "2",
pages =        "8:1--8:??",
month =        jun,
year =         "2018",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/3232925",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Jun 28 14:10:01 MDT 2018",
bibsource =    "http://www.math.utah.edu/pub/tex/bib/tweb.bib",
acknowledgement = ack-nhfb,
articleno =    "8e",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Jia:2018:KGE,
author =       "Yantao Jia and Yuanzhuo Wang and Xiaolong Jin and
Hailun Lin and Xueqi Cheng",
title =        "Knowledge Graph Embedding: a Locally and Temporally
journal =      j-TWEB,
volume =       "12",
number =       "2",
pages =        "8:1--8:??",
month =        jun,
year =         "2018",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/3132733",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Jun 28 14:10:01 MDT 2018",
bibsource =    "http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "A knowledge graph is a graph with entities of
different types as nodes and various relations among
them as edges. The construction of knowledge graphs in
the past decades facilitates many applications, such as
link prediction, web search analysis, question
answering, and so on. Knowledge graph embedding aims to
represent entities and relations in a large-scale
knowledge graph as elements in a continuous vector
space. Existing methods, for example, TransE, TransH,
and TransR, learn the embedding representation by
defining a global margin-based loss function over the
data. However, the loss function is determined during
experiments whose parameters are examined among a
closed set of candidates. Moreover, embeddings over two
knowledge graphs with different entities and relations
share the same set of candidates, ignoring the locality
of both graphs. This leads to the limited performance
locally adaptive translation method for knowledge graph
embedding, called TransA, is proposed to find the loss
function by adaptively determining its margin over
different knowledge graphs. Then the convergence of
TransA is verified from the aspect of its uniform
stability. To make the embedding methods up-to-date
when new vertices and edges are added into the
knowledge graph, the incremental algorithm for TransA,
optimal margin over time. Experiments on four benchmark
data sets demonstrate the superiority of the proposed
method, as compared to the state-of-the-art ones.",
acknowledgement = ack-nhfb,
articleno =    "8",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Park:2018:WSD,
author =       "Souneil Park and Aleksandar Matic and Kamini Garg and
Nuria Oliver",
title =        "When Simpler Data Does Not Imply Less Information: a
Study of User Profiling Scenarios With Constrained View
of Mobile {HTTP(S)} Traffic",
journal =      j-TWEB,
volume =       "12",
number =       "2",
pages =        "9:1--9:??",
month =        jun,
year =         "2018",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/3143402",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Jun 28 14:10:01 MDT 2018",
bibsource =    "http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "The exponential growth in smartphone adoption is
contributing to the availability of vast amounts of
human behavioral data. This data enables the
development of increasingly accurate data-driven user
models that facilitate the delivery of personalized
services that are often free in exchange for the use of
its customers' data. Although such usage conventions
have raised many privacy concerns, the increasing value
of personal data is motivating diverse entities to
aggressively collect and exploit the data. In this
article, we unfold profiling scenarios around mobile
HTTP(S) traffic, focusing on those that have limited
but meaningful segments of the data. The capability of
the scenarios to profile personal information is
examined with real user data, collected in the wild
from 61 mobile phone users for a minimum of 30 days.
Our study attempts to model heterogeneous user traits
and interests, including personality, boredom
proneness, demographics, and shopping interests. Based
on our modeling results, we discuss various
implications to personalization, privacy, and personal
data rights.",
acknowledgement = ack-nhfb,
articleno =    "9",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Calzavara:2018:SBA,
author =       "Stefano Calzavara and Alvise Rabitti and Michele
Bugliesi",
title =        "Semantics-Based Analysis of Content Security Policy
Deployment",
journal =      j-TWEB,
volume =       "12",
number =       "2",
pages =        "10:1--10:??",
month =        jun,
year =         "2018",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/3149408",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Jun 28 14:10:01 MDT 2018",
bibsource =    "http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Content Security Policy (CSP) is a recent W3C standard
introduced to prevent and mitigate the impact of
content injection vulnerabilities on websites. In this
article, we introduce a formal semantics for the latest
stable version of the standard, CSP Level 2. We then
perform a systematic, large-scale analysis of the
effectiveness of the current CSP deployment, using the
formal semantics to substantiate our methodology and to
assess the impact of the detected issues. We focus on
four key aspects that affect the effectiveness of CSP:
configuration, and constant maintenance. Our analysis
shows that browser support for CSP is largely
satisfactory, with the exception of a few notable
issues. However, there are several shortcomings
relative to the other three aspects. CSP appears to
have a rather limited deployment as yet and, more
crucially, existing policies exhibit a number of
weaknesses and misconfiguration errors. Moreover,
content security policies are not regularly updated to
ban insecure practices and remove unintended security
violations. We argue that many of these problems can be
fixed by better exploiting the monitoring facilities of
CSP, while other issues deserve additional research,
being more rooted into the CSP design.",
acknowledgement = ack-nhfb,
articleno =    "10",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Cacheda:2018:CPU,
author =       "Fidel Cacheda and Roi Blanco and Nicola Barbieri",
title =        "Characterizing and Predicting Users' Behavior on Local
Search Queries",
journal =      j-TWEB,
volume =       "12",
number =       "2",
pages =        "11:1--11:??",
month =        jun,
year =         "2018",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/3157059",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Jun 28 14:10:01 MDT 2018",
bibsource =    "http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "The use of queries to find products and services that
are located nearby is increasing rapidly due mainly to
the ubiquity of internet access and location services
provided by smartphone devices. Local search engines
help users by matching queries with a predefined
geographical connotation (local queries'') against a
database of local business listings. Local search
differs from traditional Web search because, to
correctly capture users' click behavior, the estimation
of relevance between query and candidate results must
be integrated with geographical signals, such as
distance. The intuition is that users prefer businesses
that are physically closer to them or in a convenient
area (e.g., close to their home). However, this notion
of closeness depends upon other factors, like the
business category, the quality of the service provided,
the density of businesses in the area of interest, the
hour of the day, or even the day of the week. In this
work, we perform an extensive analysis of online users'
interactions with a local search engine, investigating
their intent, temporal patterns, and highlighting
factors, such as business reputation, Furthermore, we
investigate the problem of estimating the click-through
rate on local search ( LCTR ) by exploiting the
combination of standard retrieval methods with a rich
collection of geo-, user-, and business-dependent
features. We validate our approach on a large log
collected from a real-world local search service. Our
evaluation shows that the non-linear combination of
business and user information, geo-local and textual
relevance features leads to a significant improvements
over existing alternative approaches based on a
combination of relevance, distance, and business
reputation [1].",
acknowledgement = ack-nhfb,
articleno =    "11",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Boldi:2018:BMC,
author =       "Paolo Boldi and Andrea Marino and Massimo Santini and
Sebastiano Vigna",
title =        "{BUbiNG}: Massive Crawling for the Masses",
journal =      j-TWEB,
volume =       "12",
number =       "2",
pages =        "12:1--12:26",
month =        jun,
year =         "2018",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/3160017",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Jun 28 14:10:01 MDT 2018",
bibsource =    "http://www.math.utah.edu/pub/tex/bib/java2010.bib;
http://www.math.utah.edu/pub/tex/bib/pagerank.bib;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
URL =          "https://dl.acm.org/citation.cfm?doid=3176641.3160017",
abstract =     "Although web crawlers have been around for twenty
years by now, there is virtually no freely available,
open-source crawling software that guarantees high
throughput, overcomes the limits of single-machine
systems, and, at the same time, scales linearly with
filling this gap, through the description of BUbiNG,
our next-generation web crawler built upon the authors'
experience with UbiCrawler [9] and on the last ten
years of research on the topic. BUbiNG is an
open-source Java fully distributed crawler; a single
BUbiNG agent, using sizeable hardware, can crawl
several thousand pages per second respecting strict
politeness constraints, both host- and IP-based. Unlike
existing open-source distributed crawlers that rely on
batch techniques (like MapReduce), BUbiNG job
distribution is based on modern high-speed protocols to
achieve very high throughput.",
acknowledgement = ack-nhfb,
articleno =    "12",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "BUbiNG; centrality measures; distributed systems;
Java; PageRank; UbiCrawler; Web crawling",
}

@Article{Gaeta:2018:MID,
author =       "Rossano Gaeta",
title =        "A Model of Information Diffusion in Interconnected
Online Social Networks",
journal =      j-TWEB,
volume =       "12",
number =       "2",
pages =        "13:1--13:??",
month =        jun,
year =         "2018",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/3160000",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Jun 28 14:10:01 MDT 2018",
bibsource =    "http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Online social networks (OSN) have today reached a
remarkable capillary diffusion. There are numerous
examples of very large platforms people use to
communicate and maintain relationships. People also
subscribe to several OSNs, e.g., people create accounts
to online social internetworking (OSI) scenarios where
users who subscribe to multiple OSNs are termed as
bridges. Unfortunately, several important features make
the study of information propagation in an OSI scenario
a difficult task, e.g., correlations in both the
structural characteristics of OSNs and the bridge
interconnections among them, heterogeneity and size of
OSNs, activity factors, cross-posting propensity, and
graph-based model that is amenable to efficient
numerical solution to analyze the phenomenon of
information propagation in an OSI scenario; in the
model development, we take into account heterogeneity
and correlations introduced by both topological
(correlations among nodes degrees and among bridge
distributions) and user-related factors (activity
index, cross-posting propensity). We first validate the
model predictions against simulations on snapshots of
interconnected OSNs in a reference scenario.
Subsequently, we exploit the model to show the impact
on the information propagation of several
characteristics of the reference scenario, i.e., size
and complexity of the OSI scenario, degree distribution
and overall number of bridges, growth and decline of
OSNs in time, and time-varying cross-posting users
propensity.",
acknowledgement = ack-nhfb,
articleno =    "13",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Davison:2018:TR,
author =       "Brian D. Davison",
title =        "2017 {TWEB} Reviewers",
journal =      j-TWEB,
volume =       "12",
number =       "2",
pages =        "14:1--14:??",
month =        jun,
year =         "2018",
CODEN =        "????",
DOI =          "https://doi.org/10.1145/3209033",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Jun 28 14:10:01 MDT 2018",
bibsource =    "http://www.math.utah.edu/pub/tex/bib/tweb.bib",
acknowledgement = ack-nhfb,
articleno =    "14",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}