%%% -*-BibTeX-*-
%%% ====================================================================
%%% BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.21",
%%%     date            = "06 November 2014",
%%%     time            = "16:09:14 MDT",
%%%     filename        = "tweb.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "29293 7135 40087 373977",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "ACM Transactions on the Web (TWEB);
%%%                        bibliography; TWEB",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        ACM Transactions on the Web (TWEB) (CODEN
%%%                        ????, ISSN 1559-1131), covering all journal
%%%                        issues from 2007 -- date.
%%%
%%%                        At version 1.21, the COMPLETE journal
%%%                        coverage looked like this:
%%%
%%%                             2007 (  14)    2010 (  17)    2013 (  30)
%%%                             2008 (  22)    2011 (  21)    2014 (  19)
%%%                             2009 (  14)    2012 (  18)
%%%
%%%                             Article:        155
%%%
%%%                             Total entries:  155
%%%
%%%                        The journal Web page can be found at:
%%%
%%%                            http://www.acm.org/pubs/tweb.html
%%%
%%%
%%%                            http://www.acm.org/tweb/
%%%                            http://portal.acm.org/browse_dl.cfm?idx=J1062
%%%
%%%                        Qualified subscribers can retrieve the full
%%%                        text of recent articles in PDF form.
%%%
%%%                        The initial draft was extracted from the ACM
%%%                        Web pages.
%%%
%%%                        ACM copyrights explicitly permit abstracting
%%%                        with credit, so article abstracts, keywords,
%%%                        and subject classifications have been
%%%                        included in this bibliography wherever
%%%                        available.  Article reviews have been
%%%                        omitted, until their copyright status has
%%%                        been clarified.
%%%
%%%                        bibsource keys in the bibliography entries
%%%                        below indicate the entry originally came
%%%                        from the computer science bibliography
%%%                        archive, even though it has likely since
%%%                        been corrected and updated.
%%%
%%%                        URL keys in the bibliography point to
%%%                        World Wide Web locations of additional
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, using bibsort -byvolume.''
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility."
%%%     }
%%% ====================================================================

@Preamble{"\input bibnames.sty" #
"\def \TM {${}^{\sc TM}$}"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:

@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:

@String{j-TWEB                  = "ACM Transactions on the Web (TWEB)"}

%%% ====================================================================
%%% Bibliography entries:

@Article{Ashman:2007:I,
author =       "Helen Ashman and Arun Iyengar",
title =        "Introduction",
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "1:1--1:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1232722.1232723",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
acknowledgement = ack-nhfb,
articleno =    "1",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Urgaonkar:2007:AMM,
author =       "Bhuvan Urgaonkar and Giovanni Pacifici and Prashant
Shenoy and Mike Spreitzer and Asser Tantawi",
title =        "Analytic modeling of multitier {Internet}
applications",
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "2:1--2:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1232722.1232724",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Since many Internet applications employ a multitier
of analytically modeling the behavior of such
applications. We present a model based on a network of
queues where the queues represent different tiers of
the application. Our model is sufficiently general to
capture (i) the behavior of tiers with significantly
different performance characteristics and (ii)
application idiosyncrasies such as session-based
replicas, and caching at intermediate tiers. We
validate our model using real multitier applications
running on a Linux server cluster. Our experiments
indicate that our model faithfully captures the
performance of these applications for a number of
workloads and configurations. Furthermore, our model
successfully handles a comprehensive range of resource
utilization---from 0 to near saturation for the
CPU---for two separate tiers. For a variety of
scenarios, including those with caching at one of the
application tiers, the average response times predicted
by our model were within the 95\% confidence intervals
of the observed average response times. Our experiments
also demonstrate the utility of the model for dynamic
capacity provisioning, performance prediction,
bottleneck identification, and session policing. In one
scenario, where the request arrival rate increased from
less than 1500 to nearly 4200 requests/minute, a
dynamic provisioning technique employing our model was
able to maintain response time targets by increasing
the capacity of two of the tiers by factors of 2 and
3.5, respectively.",
acknowledgement = ack-nhfb,
articleno =    "2",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "analytical model; dynamic provisioning; hosting
platform; Internet service; mean-value analysis;
performance prediction; policing; queuing theory;
session; tier",
}

@Article{Jansen:2007:CES,
author =       "Bernard J. Jansen",
title =        "The comparative effectiveness of sponsored and
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "3:1--3:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1232722.1232725",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "The predominant business model for Web search engines
is sponsored search, which generates billions in yearly
consumers with relevant choices for products and
services? We address this and related issues by
investigating the relevance of sponsored and
search engines. The results show that average relevance
practically the same, although the relevance ratings
108 ecommerce queries and 8,256 retrieved links for
these queries from three major Web search engines:
measures, we qualitatively analyzed the e-commerce
queries, deriving five categorizations of underlying
information needs. Product-specific queries are the
most prevalent (48\%). Title (62\%) and summary (33\%)
with URL a distant third (2\%). To gauge the
effectiveness of sponsored search campaigns, we
It appears that links from organizations with large
sponsored search campaigns are more relevant than the
Web search engines and sponsored search as a long-term
business model and as a mechanism for finding relevant
information for searchers.",
acknowledgement = ack-nhfb,
articleno =    "3",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
results; sponsored search; Web search engines; Web
searching",
}

@Article{Church:2007:MIA,
author =       "Karen Church and Barry Smyth and Paul Cotter and Keith
title =        "Mobile information access: {A} study of emerging
search behavior on the mobile {Internet}",
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "4:1--4:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1232722.1232726",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "It is likely that mobile phones will soon come to
rival more traditional devices as the primary platform
for information access. Consequently, it is important
to understand the emerging information access behavior
of mobile Internet (MI) users especially in relation to
their use of mobile handsets for information browsing
the results of a recent analysis of the MI habits of
more than 600,000 European MI users, with a particular
emphasis on the emerging interest in mobile search. We
consider a range of factors including whether there are
key differences between browsing and search behavior on
the MI compared to the Web. We highlight how browsing
continues to dominate mobile information access, but go
on to show how search is becoming an increasingly
popular information access alternative especially in
relation to certain types of mobile handsets and
information needs. Moreover, we show that sessions
involving search tend to be longer and more data-rich
than those that do not involve search. We also look at
the type of queries used during mobile search and the
way that these queries tend to be modified during the
course of a mobile search session. Finally we examine
the overlap among mobile search queries and the
different topics mobile users are interested in.",
acknowledgement = ack-nhfb,
articleno =    "4",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "log analysis; Mobile browsing; mobile Internet; mobile
search",
}

@Article{Leskovec:2007:DVM,
Huberman",
title =        "The dynamics of viral marketing",
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "5:1--5:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1232722.1232727",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We present an analysis of a person-to-person
recommendation network, consisting of 4 million people
who made 16 million recommendations on half a million
products. We observe the propagation of recommendations
and the cascade sizes, which we explain by a simple
stochastic model. We analyze how user behavior varies
within user communities defined by a recommendation
network. Product purchases follow a long tail' where a
significant share of purchases belongs to rarely sold
items. We establish how the recommendation network
grows over time and how effective it is from the
viewpoint of the sender and receiver of the
recommendations. While on average recommendations are
not very effective at inducing purchases and do not
spread very far, we present a model that successfully
identifies communities, product, and pricing categories
for which viral marketing seems to be very effective.",
acknowledgement = ack-nhfb,
articleno =    "5",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "e-commerce; long tail; network analysis; recommender
systems; Viral marketing; word-of-mouth",
}

@Article{Yu:2007:EAW,
author =       "Tao Yu and Yue Zhang and Kwei-Jay Lin",
title =        "Efficient algorithms for {Web} services selection with
end-to-end {QoS} constraints",
journal =      j-TWEB,
volume =       "1",
number =       "1",
pages =        "6:1--6:??",
month =        may,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1232722.1232728",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Service-Oriented Architecture (SOA) provides a
flexible framework for service composition. Using
standard-based protocols (such as SOAP and WSDL),
composite services can be constructed by integrating
atomic services developed independently. Algorithms are
needed to select service components with various QoS
levels according to some application-dependent
performance requirements. We design a broker-based
architecture to facilitate the selection of QoS-based
services. The objective of service selection is to
maximize an application-specific utility function under
the end-to-end QoS constraints. The problem is modeled
in two ways: the combinatorial model and the graph
model. The combinatorial model defines the problem as a
multidimension multichoice 0-1 knapsack problem (MMKP).
The graph model defines the problem as a
multiconstraint optimal path (MCOP) problem. Efficient
heuristic algorithms for service processes of different
and their performances are studied by simulations. We
also compare the pros and cons between the two
models.",
acknowledgement = ack-nhfb,
articleno =    "6",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "End-to-end QoS; service composition; service oriented
architecture (SOA); service selection; Web services",
}

@Article{Dubinko:2007:VTT,
author =       "Micah Dubinko and Ravi Kumar and Joseph Magnani and
Jasmine Novak and Prabhakar Raghavan and Andrew
Tomkins",
title =        "Visualizing tags over time",
journal =      j-TWEB,
volume =       "1",
number =       "2",
pages =        "7:1--7:??",
month =        aug,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1255438.1255439",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We consider the problem of visualizing the evolution
of tags within the Flickr (flickr.com) online image
sharing community. Any user of the Flickr service may
append a tag to any photo in the system. Over the past
year, users have on average added over a million tags
each week. Understanding the evolution of these tags
over time is therefore a challenging task. We present a
new approach based on a characterization of the most
interesting tags associated with a sliding interval of
time. An animation provided via Flash in a Web browser
allows the user to observe and interact with the
interesting tags as they evolve over time.\par

New algorithms and data structures are required to
support the efficient generation of this visualization.
We combine a novel solution to an interval covering
problem with extensions to previous work on score
aggregation in order to create an efficient backend
system capable of producing visualizations at arbitrary
scales on this large dataset in real time.",
acknowledgement = ack-nhfb,
articleno =    "7",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "Flickr; interval covering; tags; temporal evolution;
visualization",
}

@Article{Mohan:2007:SPC,
author =       "Bharath Kumar Mohan and Benjamin J. Keller and Naren
Ramakrishnan",
title =        "Scouts, promoters, and connectors: {The} roles of
ratings in nearest-neighbor collaborative filtering",
journal =      j-TWEB,
volume =       "1",
number =       "2",
pages =        "8:1--8:??",
month =        aug,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1255438.1255440",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Recommender systems aggregate individual user ratings
into predictions of products or services that might
interest visitors. The quality of this aggregation
process crucially affects the user experience and hence
the effectiveness of recommenders in e-commerce. We
present a characterization of nearest-neighbor
collaborative filtering that allows us to disaggregate
global recommender performance measures into
contributions made by each individual rating. In
particular, we formulate three roles--- {\em scouts},
{\em promoters}, and {\em connectors\/} ---that capture
how users receive recommendations, how items get
recommended, and how ratings of these two types are
themselves connected, respectively. These roles find
direct uses in improving recommendations for users, in
better targeting of items and, most importantly, in
helping monitor the health of the system as a whole.
For instance, they can be used to track the evolution
of neighborhoods, to identify rating subspaces that do
not contribute (or contribute negatively) to system
performance, to enumerate users who are in danger of
leaving, and to assess the susceptibility of the system
to attacks such as shilling. We argue that the three
rating roles presented here provide broad primitives to
manage a recommender system and its community.",
acknowledgement = ack-nhfb,
articleno =    "8",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "collaborative filtering; connectors; neighborhoods;
promoters; Recommender systems; scouts; user-based and
item-based algorithms",
}

@Article{Rogers:2007:EPB,
author =       "Alex Rogers and Esther David and Nicholas R. Jennings
and Jeremy Schiff",
title =        "The effects of proxy bidding and minimum bid
increments within {eBay} auctions",
journal =      j-TWEB,
volume =       "1",
number =       "2",
pages =        "9:1--9:??",
month =        aug,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1255438.1255441",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We present a mathematical model of the eBay auction
protocol and perform a detailed analysis of the effects
that the eBay proxy bidding system and the minimum bid
increment have on the auction properties. We first
consider the revenue of the auction, and we show
analytically that when two bidders with independent
private valuations use the eBay proxy bidding system
there exists an optimal value for the minimum bid
increment at which the auctioneer's revenue is
maximized. We then consider the sequential way in which
bids are placed within the auction, and we show
analytically that independent of assumptions regarding
the bidders' valuation distribution or bidding strategy
the number of visible bids placed is related to the
logarithm of the number of potential bidders. Thus, in
many cases, it is only a minority of the potential
bidders that are able to submit bids and are visible in
the auction bid history (despite the fact that the
other hidden bidders are still effectively competing
for the item). Furthermore, we show through simulation
that the minimum bid increment also introduces an
inefficiency to the auction, whereby a bidder who
enters the auction late may find that its valuation is
insufficient to allow them to advance the current bid
by the minimum bid increment despite them actually
having the highest valuation for the item. Finally, we
use these results to consider appropriate strategies
for bidders within real world eBay auctions. We show
that while last-minute bidding (sniping) is an
effective strategy against bidders engaging in
incremental bidding (and against those with common
values), in general, delaying bidding is
disadvantageous even if delayed bids are sure to be
received before the auction closes. Thus, when several
bidders submit last-minute bids, we show that rather
than seeking to bid as late as possible, a bidder
should try to be the first sniper to bid (i.e., it
should snipe before the snipers'').",
acknowledgement = ack-nhfb,
articleno =    "9",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "bid increment; electronic commerce; Online auctions;
proxy bidding; sniping",
}

@Article{Serrano:2007:DSW,
author =       "M. {\'A}ngeles Serrano and Ana Maguitman and
Mari{\'a}n Bogu{\~n}{\'a} and Santo Fortunato and
Alessandro Vespignani",
title =        "Decoding the structure of the {WWW}: {A} comparative
analysis of {Web} crawls",
journal =      j-TWEB,
volume =       "1",
number =       "2",
pages =        "10:1--10:??",
month =        aug,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1255438.1255442",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "The understanding of the immense and intricate
topological structure of the World Wide Web (WWW) is a
major scientific and technological challenge. This has
been recently tackled by characterizing the properties
of its representative graphs, in which vertices and
directed edges are identified with Web pages and
hyperlinks, respectively. Data gathered in large-scale
crawls have been analyzed by several groups resulting
in a general picture of the WWW that encompasses many
of the complex properties typical of rapidly evolving
statistical analysis of the topological properties of
four different WWW graphs obtained with different
crawlers. We find that, despite the very large size of
the samples, the statistical measures characterizing
these graphs differ quantitatively, and in some cases
qualitatively, depending on the domain analyzed and the
crawl used for gathering the data. This spurs the issue
of the presence of sampling biases and structural
differences of Web crawls that might induce properties
not representative of the actual global underlying
graph. In short, the stability of the widely accepted
statistical description of the Web is called into
question. In order to provide a more accurate
characterization of the Web graph, we study statistical
measures beyond the degree distribution, such as
degree-degree correlation functions or the statistics
of reciprocal connections. The latter appears to
enclose the relevant correlations of the WWW graph and
carry most of the topological information of the Web.
The analysis of this quantity is also of major interest
in relation to the navigability and searchability of
the Web.",
acknowledgement = ack-nhfb,
articleno =    "10",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "crawler biases; statistical analysis; Web graph
structure; Web measurement",
}

@Article{Reis:2007:BVD,
author =       "Charles Reis and John Dunagan and Helen J. Wang and
Opher Dubrovsky and Saher Esmeir",
title =        "{BrowserShield}: {Vulnerability}-driven filtering of
dynamic {HTML}",
journal =      j-TWEB,
volume =       "1",
number =       "3",
pages =        "11:1--11:??",
month =        sep,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1281480.1281481",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Vulnerability-driven filtering of network data can
offer a fast and easy-to-deploy alternative or
intermediary to software patching, as exemplified in
Shield's vision to a new domain, inspecting and
cleansing not just static content, but also dynamic
content. The dynamic content we target is the dynamic
HTML in Web pages, which have become a popular vector
for attacks. The key challenge in filtering dynamic
HTML is that it is undecidable to statically determine
whether an embedded script will exploit the browser at
runtime. We avoid this undecidability problem by
rewriting web pages and any embedded scripts into safe
equivalents, inserting checks so that the filtering is
done at runtime. The rewritten pages contain logic for
recursively applying runtime checks to dynamically
generated or modified web content, based on known
vulnerabilities. We have built and evaluated {\em
BrowserShield}, a general framework that performs this
dynamic instrumentation of embedded scripts, and that
admits policies for customized runtime actions like
vulnerability-driven filtering. We also explore other
applications on top of BrowserShield.",
acknowledgement = ack-nhfb,
articleno =    "11",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "code rewriting; JavaScript; vulnerability; Web
browser",
}

@Article{Sun:2007:MDW,
author =       "Zan Sun and Jalal Mahmud and I. V. Ramakrishnan and
Saikat Mukherjee",
title =        "Model-directed {Web} transactions under constrained
modalities",
journal =      j-TWEB,
volume =       "1",
number =       "3",
pages =        "12:1--12:??",
month =        sep,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1281480.1281482",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Online transactions (e.g., buying a book on the Web)
typically involve a number of steps spanning several
pages. Conducting such transactions under constrained
interaction modalities as exemplified by small screen
handhelds or interactive speech interfaces --- the
primary mode of communication for visually impaired
individuals --- is a strenuous, fatigue-inducing
activity. But usually one needs to browse only a small
fragment of a Web page to perform a transactional step
such as a form fillout, selecting an item from a search
results list, and so on. We exploit this observation to
develop an automata-based process model that delivers
only the relevant'' page fragments at each
transactional step, thereby reducing information
overload on such narrow interaction bandwidths. We
realize this model by coupling techniques from content
analysis of Web documents, automata learning and
statistical classification. The process model and
associated techniques have been incorporated into
Guide-O, a prototype system that facilitates online
transactions using speech/keyboard interface
(Guide-O-Speech), or with limited-display size
handhelds (Guide-O-Mobile). Performance of Guide-O and
its user experience are reported.",
acknowledgement = ack-nhfb,
articleno =    "12",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "assistive device; content adaption; Web transaction",
}

author =       "Raj Sharman and Shiva Shankar Ramanna and Ram Ramesh
and Ram Gopal",
title =        "Cache architecture for on-demand streaming on the
{Web}",
journal =      j-TWEB,
volume =       "1",
number =       "3",
pages =        "13:1--13:??",
month =        sep,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1281480.1281483",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "On-demand streaming from a remote server through
best-effort Internet poses several challenges because
of network losses and variable delays. The primary
technique used to improve the quality of distributed
content service is replication. In the context of the
Internet, Web caching is the traditional mechanism that
delivery model for a distributed architecture in which
video is streamed from remote servers to edge caches
where the video is buffered and then streamed to the
client through a last-mile connection. The model uses a
novel revolving indexed cache buffer management
mechanism at the edge cache and employs selective
retransmissions of lost packets between the remote and
edge cache for a best-effort recovery of the losses.
The new Web cache buffer management scheme includes a
dynamic adjustment of cache buffer parameters based on
network conditions. In addition, performance of buffer
management and retransmission policies at the edge
cache is modeled and assessed using a probabilistic
analysis of the streaming process as well as system
simulations. The influence of different endogenous
control parameters on the quality of stream received by
the client is studied. Calibration curves on the QoS
metrics for different network conditions have been
obtained using simulations. Edge cache management can
be done using these calibration curves. ISPs can make
use of calibration curves to set the values of the
endogenous control parameters for specific QoS in
real-time streaming operations based on network
conditions. A methodology to benchmark transmission
characteristics using real-time traffic data is
developed to enable effective decision making on edge
cache buffer allocation and management strategies.",
acknowledgement = ack-nhfb,
articleno =    "13",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "buffering; edge cache; on-demand streaming; quality of
service; selective retransmissions; Web caching",
}

@Article{Zdun:2007:MPD,
author =       "Uwe Zdun and Carsten Hentrich and Schahram Dustdar",
title =        "Modeling process-driven and service-oriented
architectures using patterns and pattern primitives",
journal =      j-TWEB,
volume =       "1",
number =       "3",
pages =        "14:1--14:??",
month =        sep,
year =         "2007",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1281480.1281484",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Service-oriented architectures are increasingly used
in the context of business processes. However, the
proven practices for process-oriented integration of
services are not well documented yet. In addition,
modeling approaches for the integration of processes
and services are neither mature nor do they exactly
propose a pattern language for process-oriented
integration of services to describe the proven
practices. Our main contribution is a modeling concept
based on pattern primitives for these patterns. A
pattern primitive is a fundamental, precisely specified
modeling element that represents a pattern. We present
a catalog of pattern primitives that are precisely
modeled using OCL constraints and map these primitives
to the patterns in the pattern language of
process-oriented integration of services. We also
present a model validation tool that we have developed
to support modeling the process-oriented integration of
services, and an industrial case study in which we have
applied our results.",
acknowledgement = ack-nhfb,
articleno =    "14",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "middleware; Service-oriented architecture; software
patterns",
}

@Article{Najork:2008:ISS,
author =       "Marc Najork and Brian D. Davison",
title =        "Introduction to special section on adversarial issues
in {Web} search",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "1:1--1:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1326561.1326562",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
acknowledgement = ack-nhfb,
articleno =    "1",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Becchetti:2008:LAW,
author =       "Luca Becchetti and Carlos Castillo and Debora Donato
and Ricardo Baeza-Yates and Stefano Leonardi",
title =        "Link analysis for {Web} spam detection",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "2:1--2:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1326561.1326563",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We propose link-based techniques for automatic
detection of Web spam, a term referring to pages which
use deceptive techniques to obtain undeservedly high
scores in search engines. The use of Web spam is
widespread and difficult to solve, mostly due to the
large size of the Web which means that, in practice,
many algorithms are infeasible.\par

We perform a statistical analysis of a large collection
of Web pages. In particular, we compute statistics of
the links in the vicinity of every Web page applying
rank propagation and probabilistic counting over the
entire Web graph in a scalable way. These statistical
features are used to build Web spam classifiers which
only consider the link structure of the Web, regardless
of page contents. We then present a study of the
performance of each of the classifiers alone, as well
as their combined performance, by testing them over a
large collection of Web link spam. After tenfold
cross-validation, our best classifiers have a
performance comparable to that of state-of-the-art spam
classifiers that use content attributes, but are
orthogonal to content-based methods.",
acknowledgement = ack-nhfb,
articleno =    "2",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Urvoy:2008:TWS,
author =       "Tanguy Urvoy and Emmanuel Chauveau and Pascal Filoche
and Thomas Lavergne",
title =        "Tracking {Web} spam with {HTML} style similarities",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "3:1--3:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1326561.1326564",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Automatically generated content is ubiquitous in the
web: dynamic sites built using the three-tier paradigm
are good examples (e.g., commercial sites, blogs and
other sites edited using web authoring software), as
well as less legitimate spamdexing attempts (e.g., link
farms, faked directories).\par

Those pages built using the same generating method
(template or script) share a common look and feel''
that is not easily detected by common text
classification methods, but is more related to
stylometry.\par

In this work we study and compare several HTML style
similarity measures based on both textual and
extra-textual features in HTML source code. We also
propose a flexible algorithm to cluster a large
collection of documents according to these measures.
Since the proposed algorithm is based on locality
sensitive hashing (LSH), we first review this
technique.\par

We then describe how to use the HTML style similarity
clusters to pinpoint dubious pages and enhance the
quality of spam classifiers. We present an evaluation
of our algorithm on the WEBSPAM-UK2006 dataset.",
acknowledgement = ack-nhfb,
articleno =    "3",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "Clustering; document similarity; search engine spam;
stylometry; templates identification",
}

@Article{Lin:2008:DST,
author =       "Yu-Ru Lin and Hari Sundaram and Yun Chi and Junichi
Tatemura and Belle L. Tseng",
title =        "Detecting splogs via temporal dynamics using
self-similarity analysis",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "4:1--4:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1326561.1326565",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
(splog) detection using temporal and structural
regularity of content, post time and links. Splogs are
undesirable blogs meant to attract search engine
traffic, used solely for promoting affiliate sites.
Blogs represent popular online media, and splogs not
only degrade the quality of search engine results, but
also waste network resources. The splog detection
problem is made difficult due to the lack of stable
content descriptors.\par

We have developed a new technique for detecting splogs,
based on the observation that a blog is a dynamic,
growing sequence of entries (or posts) rather than a
collection of individual pages. In our approach, splogs
are recognized by their temporal characteristics and
content. There are three key ideas in our splog
detection framework. (a) We represent the blog temporal
dynamics using self-similarity matrices defined on the
histogram intersection similarity measure of the time,
content, and link attributes of posts, to investigate
the temporal changes of the post sequence. (b) We study
the blog temporal characteristics using a visual
representation derived from the self-similarity
measures. The visual signature reveals correlation
between attributes and posts, depending on the type of
blogs (normal blogs and splogs). (c) We propose two
types of novel temporal features to capture the splog
temporal characteristics. In our splog detector, these
novel features are combined with content based
features. We extract a content based feature vector
the blog. The dimensionality of the feature vector is
reduced by Fisher linear discriminant analysis. We have
tested an SVM-based splog detector using proposed
features on real world datasets, with appreciable
results (90\% accuracy).",
acknowledgement = ack-nhfb,
articleno =    "4",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "Blogs; regularity; self-similarity; spam; splog
detection; temporal dynamics; topology",
}

@Article{Weinreich:2008:QAE,
author =       "Harald Weinreich and Hartmut Obendorf and Eelco Herder
and Matthias Mayer",
title =        "Not quite the average: {An} empirical study of {Web}
use",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "5:1--5:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1326561.1326566",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "In the past decade, the World Wide Web has been
subject to dramatic changes. Web sites have evolved
from static information resources to dynamic and
interactive applications that are used for a broad
scope of activities on a daily basis. To examine the
consequences of these changes on user behavior, we
conducted a long-term client-side Web usage study with
twenty-five participants. This report presents results
of this study and compares the user behavior with
previous long-term browser usage studies, which range
in age from seven to thirteen years. Based on the
empirical data and the interview results, various
implications for the interface design of browsers and
Web sites are discussed.\par

A major finding is the decreasing prominence of
backtracking in Web navigation. This can largely be
attributed to the increasing importance of dynamic,
service-oriented Web sites. Users do not navigate on
these sites searching for information, but rather
interact with an online application to complete certain
tasks. Furthermore, the usage of multiple windows and
tabs has partly replaced back button usage, posing new
challenges for user orientation and backtracking. We
found that Web browsing is a rapid activity even for
pages with substantial content, which calls for page
designs that allow for cursory reading. Click maps
provide additional information on how users interact
with the Web on page level. Finally, substantial
differences were observed between users, and
characteristic usage patterns for different types of
Web sites emphasize the need for more adaptive and
customizable Web browsers.",
acknowledgement = ack-nhfb,
articleno =    "5",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "browser interfaces; hypertext; Navigation; usability;
user study; Web; web browsing; web design; WWW",
}

@Article{Yu:2008:FWS,
author =       "Qi Yu and Athman Bouguettaya",
title =        "Framework for {Web} service query algebra and
optimization",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "6:1--6:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1326561.1326567",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We present a query algebra that supports optimized
access of Web services through service-oriented
queries. The service query algebra is defined based on
a formal service model that provides a high-level
abstraction of Web services across an application
domain. The algebra defines a set of algebraic
operators. Algebraic service queries can be formulated
using these operators. This allows users to query their
desired services based on both functionality and
quality. We provide the implementation of each
algebraic operator. This enables the generation of
Service Execution Plans (SEPs) that can be used by
users to directly access services. We present an
optimization algorithm by extending the Dynamic
Programming (DP) approach to efficiently select the
SEPs with the best user-desired quality. The
experimental study validates the proposed algorithm by
demonstrating significant performance improvement
compared with the traditional DP approach.",
acknowledgement = ack-nhfb,
articleno =    "6",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "query optimization; service oriented computing;
service query; Web service",
}

@Article{Aleman-Meza:2008:SSA,
author =       "Boanerges Aleman-Meza and Meenakshi Nagarajan and Li
Ding and Amit Sheth and I. Budak Arpinar and Anupam
Joshi and Tim Finin",
title =        "Scalable semantic analytics on social networks for
addressing the problem of conflict of interest
detection",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "7:1--7:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1326561.1326568",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
semantic techniques for detection of Conflict of
Interest (COI). We explain the common challenges
involved in building scalable Semantic Web
connecting-the-dots problems. We describe in detail the
challenges involved in two important aspects on
building Semantic Web applications, namely, data
acquisition and entity disambiguation (or reference
reconciliation). We extend upon our previous work where
we integrated the collaborative network of a subset of
DBLP researchers with persons in a Friend-of-a-Friend
social network (FOAF). Our method finds the connections
between people, measures collaboration strength, and
includes heuristics that use friendship/affiliation
information to provide an estimate of potential COI in
a peer-review scenario. Evaluations are presented by
measuring what could have been the COI between accepted
papers in various conference tracks and their
respective program committee members. The experimental
results demonstrate that scalability can be achieved by
using a dataset of over 3 million entities (all
bibliographic data from DBLP and a large collection of
FOAF documents).",
acknowledgement = ack-nhfb,
articleno =    "7",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "conflict of interest; data fusion; DBLP; entity
disambiguation; ontologies; peer review process; RDF;
semantic analytics; semantic associations; Semantic
Web; social networks; swetoDblp",
}

@Article{Gmach:2008:AQS,
author =       "Daniel Gmach and Stefan Krompass and Andreas Scholz
and Martin Wimmer and Alfons Kemper",
title =        "Adaptive quality of service management for enterprise
services",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "8:1--8:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1326561.1326569",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "In the past, enterprise resource planning systems were
designed as monolithic software systems running on
centralized mainframes. Today, these systems are
(re-)designed as a repository of enterprise services
that are distributed throughout the available computing
infrastructure. These service oriented architectures
management concepts in order to achieve a high quality
of service level in terms of, for example,
availability, responsiveness, and throughput. The
adaptive management has to allocate service instances
to computing resources, adapt the resource allocation
to unforeseen load fluctuations, and intelligently
schedule individual requests to guarantee negotiated
service level agreements (SLAs). Our AutoGlobe platform
provides such a comprehensive adaptive service
management comprising\par

--- static service-to-server allocation based on
automatically detected service utilization
patterns,\par

--- adaptive service management based on a fuzzy
controller that remedies exceptional situations by
automatically initiating, for example, service
migration, service replication (scale-out), and\par

--- adaptive scheduling of individual service requests
that prioritizes requests depending on the current
degree of service level conformance.\par

All three complementary control components are
described in detail, and their effectiveness is
analyzed by means of realistic business application
scenarios.",
acknowledgement = ack-nhfb,
articleno =    "8",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "fuzzy controller; Quality of service; workload
characterization",
}

@Article{Yang:2008:DGN,
author =       "Bo Yang and Jiming Liu",
title =        "Discovering global network communities based on local
centralities",
journal =      j-TWEB,
volume =       "2",
number =       "1",
pages =        "9:1--9:??",
month =        feb,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1326561.1326570",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "One of the central problems in studying and
understanding complex networks, such as online social
networks or World Wide Web, is to discover hidden,
either physically (e.g., interactions or hyperlinks) or
logically (e.g., profiles or semantics) well-defined
topological structures. From a practical point of view,
a good example of such structures would be so-called
network communities. Earlier studies have introduced
various formulations as well as methods for the problem
of identifying or extracting communities. While each of
them has pros and cons as far as the effectiveness and
efficiency are concerned, almost none of them has
explicitly dealt with the potential relationship
between the global topological property of a network
and the local property of individual nodes. In order to
study this problem, this paper presents a new
algorithm, called ICS, which aims to discover natural
network communities by inferring from the local
information of nodes inherently hidden in networks
based on a new centrality, that is, clustering
centrality, which is a generalization of eigenvector
centrality. As compared with existing methods, our
method runs efficiently with a good clustering
performance. Additionally, it is insensitive to its
built-in parameters and prior knowledge.",
acknowledgement = ack-nhfb,
articleno =    "9",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "centrality; community mining; Complex network; graph
theory; World Wide Web",
}

@Article{Dustdar:2008:ISI,
author =       "Schahram Dustdar and Bernd J. Kr{\"a}mer",
title =        "Introduction to special issue on service oriented
computing {(SOC)}",
journal =      j-TWEB,
volume =       "2",
number =       "2",
pages =        "10:1--10:??",
month =        apr,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1346337.1346338",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
acknowledgement = ack-nhfb,
articleno =    "10",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Belhajjame:2008:AAW,
author =       "Khalid Belhajjame and Suzanne M. Embury and Norman W.
Paton and Robert Stevens and Carole A. Goble",
title =        "Automatic annotation of {Web} services based on
workflow definitions",
journal =      j-TWEB,
volume =       "2",
number =       "2",
pages =        "11:1--11:??",
month =        apr,
year =         "2008",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/1346237.1346239",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Semantic annotations of web services can support the
effective and efficient discovery of services, and
guide their composition into workflows. At present,
however, the practical utility of such annotations is
limited by the small number of service annotations
available for general use. Manual annotation of
services is a time consuming and thus expensive task,
so some means are required by which services can be
automatically (or semi-automatically) annotated. In
this paper, we show how information can be inferred
about the semantics of operation parameters based on
their connections to other (annotated) operation
parameters within tried-and-tested workflows. Because
the data links in the workflows do not necessarily
contain every possible connection of compatible
parameters, we can infer only constraints on the
semantics of parameters. We show that despite their
imprecise nature these so-called {\em loose
annotations\/} are still of value in supporting the
manual annotation task, inspecting workflows and
discovering services. We also show that derived
annotations for already annotated parameters are
useful. By comparing existing and newly derived
annotations of operation parameters, we can support the
detection of errors in existing annotations, the
ontology used for annotation and in workflows. The
derivation mechanism has been implemented, and its
practical applicability for inferring new annotations
has been established through an experimental
evaluation. The usefulness of the derived annotations
is also demonstrated.",
acknowledgement = ack-nhfb,
articleno =    "11",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "automatic annotation; ontologies; semantic
annotations; Semantic web services; workflows",
}

@Article{Elgedawy:2008:CAH,
author =       "Islam Elgedawy and Zahir Tari and James A. Thom",
title =        "Correctness-aware high-level functional matching
approaches for semantic {Web} services",
journal =      j-TWEB,
volume =       "2",
number =       "2",
pages =        "12:1--12:??",
month =        apr,
year =         "2008",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/1346237.1346240",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Service matching approaches trade precision for
recall, creating the need for users to choose the
correct services, which obviously is a major obstacle
for automating the service discovery and aggregation
processes. Our approach to overcome this problem, is to
eliminate the appearance of false positives by
returning only the correct services. As different users
have different semantics for what is correct, we argue
that the correctness of the matching results must be
determined according to the achievement of users'
goals: that only services achieving users' goals are
considered correct. To determine such correctness, we
argue that the matching process should be based
primarily on the high-level functional specifications
(namely goals, achievement contexts, and external
structures, algorithms, and theorems required to
correctly match such specifications. We propose a model
called $G^+$, to capture such specifications, for
both services and users, in a machine-understandable
format. We propose a data structure, called a Concepts
Substitutability Graph (CSG), to capture the
substitution semantics of application domain concepts
in a context-based manner, in order to determine the
semantic-preserving mapping transformations required to
match different {\em G\/}$^+$ models. We also propose a
behavior matching approach that is able to match states
in an m-to-n manner, such that behavior models with
different numbers of state transitions can be matched.
Finally, we show how services are matched and
aggregated according to their $G^+$ models. Results
of supporting experiments demonstrate the advantages of
the proposed service matching approaches.",
acknowledgement = ack-nhfb,
articleno =    "12",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "High-level functional matching; semantic Web services;
service aggregation",
}

@Article{Ryu:2008:SDE,
author =       "Seung Hwan Ryu and Fabio Casati and Halvard Skogsrud
and Boualem Benatallah and R{\'e}gis Saint-Paul",
title =        "Supporting the dynamic evolution of {Web} service
protocols in service-oriented architectures",
journal =      j-TWEB,
volume =       "2",
number =       "2",
pages =        "13:1--13:??",
month =        apr,
year =         "2008",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/1346237.1346241",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "In service-oriented architectures, everything is a
service and everyone is a service provider. Web
services (or simply services) are loosely coupled
software components that are published, discovered, and
invoked across the Web. As the use of Web service
grows, in order to correctly interact with them, it is
important to understand the business protocols that
provide clients with the information on how to interact
with services. In dynamic Web service environments,
service providers need to constantly adapt their
business protocols for reflecting the restrictions and
requirements proposed by new applications, new business
strategies, and new laws, or for fixing problems found
in the protocol definition. However, the effective
management of such a protocol evolution raises critical
problems: one of the most critical issues is how to
handle instances running under the old protocol when it
has been changed. Simple solutions, such as aborting
them or allowing them to continue to run according to
the old protocol, can be considered, but they are
inapplicable for many reasons (for example, the loss of
work already done and the critical nature of work). In
service managers in managing the business protocol
evolution by providing several features, such as a
variety of protocol change impact analyses
automatically determining which ongoing instances can
be migrated to the new version of protocol, and data
mining techniques inferring interaction patterns used
for classifying ongoing instances migrateable to the
new protocol. To support the protocol evolution
process, we have also developed database-backed GUI
tools on top of our existing system. The proposed
approach and tools can help service managers in
managing the evolution of ongoing instances when the
business protocols of services with which they are
interacting have changed.",
acknowledgement = ack-nhfb,
articleno =    "13",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "Business protocols; change impact analysis; decision
trees; dynamic evolution; ongoing instances; Web
services",
}

@Article{Schafer:2008:EFA,
author =       "Michael Sch{\"a}fer and Peter Dolog and Wolfgang
Nejdl",
title =        "An environment for flexible advanced compensations of
{Web} service transactions",
journal =      j-TWEB,
volume =       "2",
number =       "2",
pages =        "14:1--14:??",
month =        apr,
year =         "2008",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/1346237.1346242",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
performed by employing Web service environments.
Moreover, such environments are being provided by major
players on the technology markets. Those environments
are based on open specifications for transaction
coordination. When a failure in such an environment
occurs, a compensation can be initiated to recover from
the failure. However, current environments have only
limited capabilities for compensations, and are usually
introduce an environment to deal with advanced
compensations based on forward recovery principles. We
extend the existing Web service transaction
coordination architecture and infrastructure in order
to support flexible compensation operations. We use a
contract-based approach, which allows the specification
of permitted compensations at runtime. We introduce
{\em abstract service\/} and {\em adapter\/}
components, which allow us to separate the compensation
logic from the coordination logic. In this way, we can
easily plug in or plug out different compensation
strategies based on a specification language defined on
top of basic compensation activities and complex
compensation types. Experiments with our approach and
environment show that such an approach to compensation
is feasible and beneficial. Additionally, we introduce
a cost-benefit model to evaluate the proposed
environment based on net value analysis. The evaluation
shows in which circumstances the environment is
economical.",
acknowledgement = ack-nhfb,
articleno =    "14",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "compensations; forward-recovery; transactions; Web
services",
}

@Article{Srivatsa:2008:MAL,
author =       "Mudhakar Srivatsa and Arun Iyengar and Jian Yin and
Ling Liu",
title =        "Mitigating application-level denial of service attacks
on {Web} servers: {A} client-transparent approach",
journal =      j-TWEB,
volume =       "2",
number =       "3",
pages =        "15:1--15:??",
month =        jul,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1377488.1377489",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:58 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Recently, we have seen increasing numbers of denial of
service (DoS) attacks against online services and Web
applications either for extortion reasons or for
impairing and even disabling the competition. These DoS
attacks have increasingly targeted the application
level. Application-level DoS attacks emulate the same
request syntax and network-level traffic
characteristics as those of legitimate clients, thereby
making the attacks much harder to detect and counter.
Moreover, such attacks often target bottleneck
resources such as disk bandwidth, database bandwidth,
DoS attacks by using a twofold mechanism. First, we
perform admission control to limit the number of
concurrent clients served by the online service.
Admission control is based on port hiding that renders
the online service invisible to unauthorized clients by
hiding the port number on which the service accepts
incoming requests. Second, we perform congestion
control on admitted clients to allocate more resources
to good clients. Congestion control is achieved by
adaptively setting a client's priority level in
response to the client's requests in a way that can
incorporate application-level semantics. We present a
detailed evaluation of the proposed solution using two
sample applications: Apache HTTPD and the TPCW
benchmark (running on Apache Tomcat and IBM DB2). Our
experiments show that the proposed solution incurs low
performance overhead and is resilient to DoS attacks.",
acknowledgement = ack-nhfb,
articleno =    "15",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "client transparency; DoS Attacks; game theory; Web
servers",
}

@Article{White:2008:LPD,
author =       "Ryen W. White and Mikhail Bilenko and Silviu
Cucerzan",
title =        "Leveraging popular destinations to enhance {Web}
search interaction",
journal =      j-TWEB,
volume =       "2",
number =       "3",
pages =        "16:1--16:??",
month =        jul,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1377488.1377490",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:58 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
feature that for a given query provides links to Web
sites frequently visited by other users with similar
information needs. These popular destinations
complement traditional search results, allowing direct
navigation to authoritative resources for the query
topic. Destinations are identified using the history of
the search and browsing behavior of many users over an
extended time period, and their collective behavior
provides a basis for computing source authority. They
are drawn from the end of users' postquery browse
trails where users may cease searching once they find
relevant information. We describe a user study that
compared the suggestion of destinations with the
previously proposed suggestion of related queries as
well as with traditional, unaided Web search. Results
show that search enhanced by query suggestions
outperforms other systems in terms of subject
perceptions and search effectiveness for fact-finding
search tasks. However, search enhanced by destination
suggestions performs best for exploratory tasks with
its best performance obtained from mining past user
behavior at query-level granularity. We discuss the
implications of these and other findings from our study
for the design of search systems that utilize user
behavior, in particular, user browse trails and popular
destinations.",
acknowledgement = ack-nhfb,
articleno =    "16",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "enhanced Web search; search destinations; User
studies",
}

@Article{Andreolini:2008:MFS,
author =       "Mauro Andreolini and Sara Casolari and Michele
Colajanni",
title =        "Models and framework for supporting runtime decisions
in {Web-based} systems",
journal =      j-TWEB,
volume =       "2",
number =       "3",
pages =        "17:1--17:??",
month =        jul,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1377488.1377491",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:17:58 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Efficient management of distributed Web-based systems
requires several mechanisms that decide on request
redirection. The algorithms behind these mechanisms
typically make fast decisions on the basis of the load
conditions of the system resources. The architecture
complexity and workloads characterizing most Web-based
services make it extremely difficult to deduce a
representative view of a resource load from collected
measures that show extreme variability even at
different time scales. Hence, any decision based on
instantaneous or average views of the system load may
lead to useless or even wrong actions. As an
alternative, we propose a two-phase strategy that first
aims to obtain a representative view of the load trend
from measured system values and then applies this
representation to support runtime decision systems. We
consider two classical problems behind decisions: how
to detect significant and nontransient load changes of
a system resource and how to predict its future load
behavior. The two-phase strategy is based on stochastic
functions that are characterized by a computational
complexity that is compatible with runtime decisions.
We describe, test, and tune the two-phase strategy by
considering as a first example a multitier Web-based
system that is subject to different classes of
realistic and synthetic workloads. Also, we integrate
the proposed strategy into a framework that we validate
by applying it to support runtime decisions in a
cluster Web system and in a locally distributed Network
Intrusion Detection System.",
acknowledgement = ack-nhfb,
articleno =    "17",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
prediction; load representation; World Wide Web",
}

@Article{Amitay:2008:ISI,
author =       "Einat Amitay and Andrei Broder",
title =        "Introduction to special issue on query log analysis:
{Technology} and ethics",
journal =      j-TWEB,
volume =       "2",
number =       "4",
pages =        "18:1--18:??",
month =        oct,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1409220.1409221",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
acknowledgement = ack-nhfb,
articleno =    "18",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Cooper:2008:SQL,
author =       "Alissa Cooper",
title =        "A survey of query log privacy-enhancing techniques
from a policy perspective",
journal =      j-TWEB,
volume =       "2",
number =       "4",
pages =        "19:1--19:??",
month =        oct,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1409220.1409222",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "As popular search engines face the sometimes
conflicting interests of protecting privacy while
retaining query logs for a variety of uses, numerous
technical measures have been suggested to both enhance
privacy and preserve at least a portion of the utility
these techniques against three sets of criteria: (1)
how well the technique protects privacy, (2) how well
the technique preserves the utility of the query logs,
and (3) how well the technique might be implemented as
a user control. A user control is defined as a
mechanism that allows individual Internet users to
choose to have the technique applied to their own query
logs.",
acknowledgement = ack-nhfb,
articleno =    "19",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "log; policy; Privacy; query; search",
}

@Article{Baeza-Yates:2008:DTO,
author =       "Ricardo Baeza-Yates and Aristides Gionis and Flavio P.
Junqueira and Vanessa Murdock and Vassilis Plachouras
and Fabrizio Silvestri",
title =        "Design trade-offs for search engine caching",
journal =      j-TWEB,
volume =       "2",
number =       "4",
pages =        "20:1--20:??",
month =        oct,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1409220.1409223",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
efficient caching systems for Web search engines. We
explore the impact of different approaches, such as
static vs. dynamic caching, and caching query results
vs. caching posting lists. Using a query log spanning a
whole year, we explore the limitations of caching and
we demonstrate that caching posting lists can achieve
higher hit rates than caching query answers. We propose
a new algorithm for static caching of posting lists,
which outperforms previous methods. We also study the
problem of finding the optimal way to split the static
cache between answers and posting lists. Finally, we
effectiveness of static caching, given our observation
that the distribution of the queries changes slowly
over time. Our results and observations are applicable
to different levels of the data-access hierarchy, for
instance, for a memory/disk layer or a broker/remote
server layer.",
acknowledgement = ack-nhfb,
articleno =    "20",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "Caching; query logs; Web search",
}

@Article{Richardson:2008:LAW,
author =       "Matthew Richardson",
title =        "Learning about the world through long-term query
logs",
journal =      j-TWEB,
volume =       "2",
number =       "4",
pages =        "21:1--21:??",
month =        oct,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1409220.1409224",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
query logs. Most work on query logs to date considers
only short-term (within-session) query information. In
contrast, we show that long-term query logs can be used
to learn about the world we live in. There are many
applications of this that lead not only to improving
the search engine for its users, but also potentially
to advances in other disciplines such as medicine,
will show how long-term query logs can be used for
these purposes, and that their potential is severely
reduced if the logs are limited to short time horizons.
We show that query effects are long-lasting, provide
valuable information, and might be used to
automatically make medical discoveries, build concept
hierarchies, and generally learn about the sociological
behavior of users. We believe these applications are
only the beginning of what can be done with the
information contained in long-term query logs, and see
this work as a step toward unlocking their potential.",
acknowledgement = ack-nhfb,
articleno =    "21",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "data mining; knowledge discovery; query logs; user
behavior",
}

@Article{Koutrika:2008:CST,
author =       "Georgia Koutrika and Frans Adjie Effendi and
Zolt{\'{}}n Gy{\"o}ngyi and Paul Heymann and Hector
Garcia-Molina",
title =        "Combating spam in tagging systems: {An} evaluation",
journal =      j-TWEB,
volume =       "2",
number =       "4",
pages =        "22:1--22:??",
month =        oct,
year =         "2008",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1409220.1409225",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Tagging systems allow users to interactively annotate
a pool of shared resources using descriptive strings
called {\em tags}. Tags are used to guide users to
interesting resources and help them build communities
that share their expertise and resources. As tagging
systems are gaining in popularity, they become more
susceptible to {\em tag spam\/}: misleading tags that
are generated in order to increase the visibility of
some resources or simply to confuse users. Our goal is
to understand this problem better. In particular, we
are interested in answers to questions such as: How
many malicious users can a tagging system tolerate
before results significantly degrade? What types of
tagging systems are more vulnerable to malicious
attacks? What would be the effort and the impact of
employing a trusted moderator to find bad postings? Can
a system automatically protect itself from spam, for
instance, by exploiting user tag patterns? In a quest
for answers to these questions, we introduce a
framework for modeling tagging systems and user tagging
behavior. We also describe a method for ranking
documents matching a tag based on taggers' reliability.
Using our framework, we study the behavior of existing
approaches under malicious attacks and the impact of a
moderator and our ranking method.",
acknowledgement = ack-nhfb,
articleno =    "22",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "bookmarking systems; tag spam; Tagging; tagging
models",
}

@Article{Rattenbury:2009:MEP,
author =       "Tye Rattenbury and Mor Naaman",
title =        "Methods for extracting place semantics from {Flickr}
tags",
journal =      j-TWEB,
volume =       "3",
number =       "1",
pages =        "1:1--1:??",
month =        jan,
year =         "2009",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1462148.1462149",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:15 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "We describe an approach for extracting semantics for
tags, unstructured text-labels assigned to resources on
the Web, based on each tag's usage patterns. In
particular, we focus on the problem of extracting place
semantics for tags that are assigned to photos on
Flickr, a popular-photo sharing Web site that supports
location (latitude/longitude) metadata for photos. We
propose the adaptation of two baseline methods,
inspired by well-known burst-analysis techniques, for
the task; we also describe two novel methods, TagMaps
and scale-structure identification. We evaluate the
methods on a subset of Flickr data. We show that our
scale-structure identification method outperforms
existing techniques and that a hybrid approach
generates further improvements (achieving 85\%
precision at 81\% recall). The approach and methods
described in this work can be used in other domains
such as geo-annotated Web pages, where text terms can
be extracted and associated with usage patterns.",
acknowledgement = ack-nhfb,
articleno =    "1",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
keywords =     "places; semantics; Tagging systems; tags",
}

@Article{Jackson:2009:PBD,
author =       "Collin Jackson and Adam Barth and Andrew Bortz and
Weidong Shao and Dan Boneh",
title =        "Protecting browsers from {DNS} rebinding attacks",
journal =      j-TWEB,
volume =       "3",
number =       "1",
pages =        "2:1--2:??",
month =        jan,
year =         "2009",
CODEN =        "????",
DOI =          "http://doi.acm.org/10.1145/1462148.1462150",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
bibdate =      "Fri Apr 24 18:18:15 MDT 2009",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "DNS rebinding attacks subvert the same-origin policy
of browsers, converting them into open network proxies.
Using DNS rebinding, an attacker can circumvent
organizational and personal firewalls, send spam email,
and defraud pay-per-click advertisers. We evaluate the
cost effectiveness of mounting DNS rebinding attacks,
finding that an attacker requires less than \$100 to hijack 100,000 IP addresses. We analyze defenses to DNS rebinding attacks, including improvements to the classic DNS pinning,'' and recommend changes to browser plug-ins, firewalls, and Web servers. Our defenses have been adopted by plug-in vendors and by a number of open-source firewall implementations.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "click fraud; DNS; firewall; Same-origin policy; spam", } @Article{Bar-Yossef:2009:DCD, author = "Ziv Bar-Yossef and Idit Keidar and Uri Schonfeld", title = "Do not crawl in the {DUST}: {Different URLs with Similar Text}", journal = j-TWEB, volume = "3", number = "1", pages = "3:1--3:??", month = jan, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1462148.1462151", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:15 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We consider the problem of DUST: Different URLs with Similar Text. Such duplicate URLs are prevalent in Web sites, as Web server software often uses aliases and redirections, and dynamically generates the same page from various different URL requests. We present a novel algorithm, {\em DustBuster}, for uncovering DUST; that is, for discovering rules that transform a given URL to others that are likely to have similar content. DustBuster mines DUST effectively from previous crawl logs or Web server logs, {\em without\/} examining page contents. Verifying these rules via sampling requires fetching few actual Web pages. Search engines can benefit from information about DUST to increase the effectiveness of crawling, reduce indexing overhead, and improve the quality of popularity statistics such as PageRank.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "antialiasing; crawling; duplicate detection; Search engines; URL normalization", } @Article{Xiao:2009:BSD, author = "Xiangye Xiao and Qiong Luo and Dan Hong and Hongbo Fu and Xing Xie and Wei-Ying Ma", title = "Browsing on small displays by transforming {Web} pages into hierarchically structured subpages", journal = j-TWEB, volume = "3", number = "1", pages = "4:1--4:??", month = jan, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1462148.1462152", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:15 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We propose a new Web page transformation method to facilitate Web browsing on handheld devices such as Personal Digital Assistants (PDAs). In our approach, an original Web page that does not fit on the screen is transformed into a set of subpages, each of which fits on the screen. This transformation is done through slicing the original page into page blocks iteratively, with several factors considered. These factors include the size of the screen, the size of each page block, the number of blocks in each transformed page, the depth of the tree hierarchy that the transformed pages form, as well as the semantic coherence between blocks. We call the tree hierarchy of the transformed pages an SP-tree. In an SP-tree, an internal node consists of a textually enhanced thumbnail image with hyperlinks, and a leaf node is a block extracted from a subpage of the original Web page. We adaptively adjust the fanout and the height of the SP-tree so that each thumbnail image is clear enough for users to read, while at the same time, the number of clicks needed to reach a leaf page is few. Through this transformation algorithm, we preserve the contextual information in the original Web page and reduce scrolling. We have implemented this transformation module on a proxy server and have conducted usability studies on its performance. Our system achieved a shorter task completion time compared with that of transformations from the Opera browser in nine of ten tasks. The average improvement on familiar pages was 44\%. The average improvement on unfamiliar pages was 37\%. Subjective responses were positive.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Proxy; slicing tree; small displays; thumbnails; Web browsing; Web page adaptation", } @Article{Gabrilovich:2009:CSQ, author = "Evgeniy Gabrilovich and Andrei Broder and Marcus Fontoura and Amruta Joshi and Vanja Josifovski and Lance Riedel and Tong Zhang", title = "Classifying search queries using the {Web} as a source of knowledge", journal = j-TWEB, volume = "3", number = "2", pages = "5:1--5:??", month = apr, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1513876.1513877", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:23 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We propose a methodology for building a robust query classification system that can identify thousands of query classes, while dealing in real time with the query volume of a commercial Web search engine. We use a pseudo relevance feedback technique: given a query, we determine its topic by classifying the Web search results retrieved by the query. Motivated by the needs of search advertising, we primarily focus on rare queries, which are the hardest from the point of view of machine learning, yet in aggregate account for a considerable fraction of search engine traffic. Empirical evaluation confirms that our methodology yields a considerably higher classification accuracy than previously reported. We believe that the proposed methodology will lead to better matching of online ads to rare queries and overall to a better user experience.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Pseudo relevance feedback; query classification; Web search", } @Article{Reay:2009:LSE, author = "Ian Reay and Scott Dick and James Miller", title = "A large-scale empirical study of {P3P} privacy policies: {Stated} actions vs. legal obligations", journal = j-TWEB, volume = "3", number = "2", pages = "6:1--6:??", month = apr, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1513876.1513878", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:23 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Numerous studies over the past ten years have shown that concern for personal privacy is a major impediment to the growth of e-commerce. These concerns are so serious that most if not all consumer watchdog groups have called for some form of privacy protection for Internet users. In response, many nations around the world, including all European Union nations, Canada, Japan, and Australia, have enacted national legislation establishing mandatory safeguards for personal privacy. However, recent evidence indicates that Web sites might not be adhering to the requirements of this legislation. The goal of this study is to examine the posted privacy policies of Web sites, and compare these statements to the legal mandates under which the Web sites operate. We harvested all available P3P (Platform for Privacy Preferences Protocol) documents from the 100,000 most popular Web sites (over 3,000 full policies, and another 3,000 compact policies). This allows us to undertake an automated analysis of adherence to legal mandates on Web sites that most impact the average Internet user. Our findings show that Web sites generally do not even claim to follow all the privacy-protection mandates in their legal jurisdiction (we do not examine actual practice, only posted policies). Furthermore, this general statement appears to be true for every jurisdiction with privacy laws and any significant number of P3P policies, including European Union nations, Canada, Australia, and Web sites in the USA Safe Harbor program.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "electronic commerce; legislation and enforcement; P3P; Privacy protection", } @Article{Dourisboure:2009:ECD, author = "Yon Dourisboure and Filippo Geraci and Marco Pellegrini", title = "Extraction and classification of dense implicit communities in the {Web} graph", journal = j-TWEB, volume = "3", number = "2", pages = "7:1--7:??", month = apr, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1513876.1513879", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:23 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The World Wide Web (WWW) is rapidly becoming important for society as a medium for sharing data, information, and services, and there is a growing interest in tools for understanding collective behavior and emerging phenomena in the WWW. In this article we focus on the problem of searching and classifying {\em communities\/} in the Web. Loosely speaking a community is a group of pages related to a common interest. More formally, communities have been associated in the computer science literature with the existence of a locally dense subgraph of the Web graph (where Web pages are nodes and hyperlinks are arcs of the Web graph). The core of our contribution is a new scalable algorithm for finding relatively dense subgraphs in massive graphs. We apply our algorithm on Web graphs built on three publicly available large crawls of the Web (with raw sizes up to 120M nodes and 1G arcs). The effectiveness of our algorithm in finding dense subgraphs is demonstrated experimentally by embedding artificial communities in the Web graph and counting how many of these are blindly found. Effectiveness increases with the size and density of the communities: it is close to 100\% for communities of thirty nodes or more (even at low density). It is still about 80\% even for communities of twenty nodes with density over 50\% of the arcs present. At the lower extremes the algorithm catches 35\% of dense communities made of ten nodes. We also develop some sufficient conditions for the detection of a community under some local graph models and not-too-restrictive hypotheses. We complete our {\em Community Watch\/} system by clustering the communities found in the Web graph into homogeneous groups by topic and labeling each group by representative keywords.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "communities; detection of dense subgraph; Web graph", } @Article{Lee:2009:ISB, author = "Hsin-Tsang Lee and Derek Leonard and Xiaoming Wang and Dmitri Loguinov", title = "{IRLbot}: {Scaling} to 6 billion pages and beyond", journal = j-TWEB, volume = "3", number = "3", pages = "8:1--8:??", month = jun, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1541822.1541823", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article shares our experience in designing a Web crawler that can download billions of pages using a single-server implementation and models its performance. We first show that current crawling algorithms cannot effectively cope with the sheer volume of URLs generated in large crawls, highly branching spam, legitimate multimillion-page blog sites, and infinite loops created by server-side scripts. We then offer a set of techniques for dealing with these issues and test their performance in an implementation we call IRLbot. In our recent experiment that lasted 41 days, IRLbot running on a single server successfully crawled 6.3 billion valid HTML pages (7.6 billion connection requests) and sustained an average download rate of 319 mb/s (1,789 pages/s). Unlike our prior experiments with algorithms proposed in related work, this version of IRLbot did not experience any bottlenecks and successfully handled content from over 117 million hosts, parsed out 394 billion links, and discovered a subset of the Web graph with 41 billion unique nodes.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "crawling; IRLbot; large scale", } @Article{Tappenden:2009:CDS, author = "Andrew F. Tappenden and James Miller", title = "Cookies: {A} deployment study and the testing implications", journal = j-TWEB, volume = "3", number = "3", pages = "9:1--9:??", month = jun, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1541822.1541824", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The results of an extensive investigation of cookie deployment amongst 100,000 Internet sites are presented. Cookie deployment is found to be approaching universal levels and hence there exists an associated need for relevant Web and software engineering processes, specifically testing strategies which actively consider cookies. The semi-automated investigation demonstrates that over two-thirds of the sites studied deploy cookies. The investigation specifically examines the use of first-party, third-party, sessional, and persistent cookies within Web-based applications, identifying the presence of a P3P policy and dynamic Web technologies as major predictors of cookie usage. The results are juxtaposed with the lack of testing strategies present in the literature. A number of real-world examples, including two case studies are presented, further accentuating the need for comprehensive testing strategies for Web-based applications. The use of antirandom test case generation is explored with respect to the testing issues discussed. Finally, a number of seeding vectors are presented, providing a basis for testing cookies within Web-based applications.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Cookies; Internet browser; software testing; Web engineering; Web technologies", } @Article{Comuzzi:2009:FQB, author = "Marco Comuzzi and Barbara Pernici", title = "A framework for {QoS}-based {Web} service contracting", journal = j-TWEB, volume = "3", number = "3", pages = "10:1--10:??", month = jun, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1541822.1541825", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The extensive adoption of Web service-based applications in dynamic business scenarios, such as on-demand computing or highly reconfigurable virtual enterprises, advocates for methods and tools for the management of Web service nonfunctional aspects, such as Quality of Service (QoS). Concerning contracts on Web service QoS, the literature has mostly focused on the contract definition and on mechanisms for contract enactment, such as the monitoring of the satisfaction of negotiated QoS guarantees. In this context, this article proposes a framework for the automation of the Web service contract specification and establishment. An extensible model for defining both domain-dependent and domain-independent Web service QoS dimensions and a method for the automation of the contract establishment phase are proposed. We describe a matchmaking algorithm for the ranking of functionally equivalent services, which orders services on the basis of their ability to fulfill the service requestor requirements, while maintaining the price below a specified budget. We also provide an algorithm for the configuration of the negotiable part of the QoS Service-Level Agreement (SLA), which is used to configure the agreement with the top-ranked service identified in the matchmaking phase. Experimental results show that, in a utility theory perspective, the contract establishment phase leads to efficient outcomes. We envision two advanced application scenarios for the Web service contracting framework proposed in this article. First, it can be used to enhance Web services self-healing properties in reaction to QoS-related service failures; second, it can be exploited in process optimization for the online reconfiguration of candidate Web services QoS SLAs.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "matchmaking; negotiation; QoS; service selection; SLA; Web service", } @Article{Pilioura:2009:UPD, author = "Thomi Pilioura and Aphrodite Tsalgatidou", title = "Unified publication and discovery of semantic {Web} services", journal = j-TWEB, volume = "3", number = "3", pages = "11:1--11:??", month = jun, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1541822.1541826", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The challenge of publishing and discovering Web services has recently received lots of attention. Various solutions to this problem have been proposed which, apart from their offered advantages, suffer the following disadvantages: (i) most of them are syntactic-based, leading to poor precision and recall, (ii) they are not scalable to large numbers of services, and (iii) they are incompatible, thus yielding in cumbersome service publication and discovery. This article presents the principles, the functionality, and the design of PYRAMID-S which addresses these disadvantages by providing a scalable framework for unified publication and discovery of semantically enhanced services over heterogeneous registries. PYRAMID-S uses a hybrid peer-to-peer topology to organize Web service registries based on domains. In such a topology, each Registry retains its autonomy, meaning that it can use the publication and discovery mechanisms as well as the ontology of its choice. The viability of this approach is demonstrated through the implementation and experimental analysis of a prototype.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "evaluation; PYRAMID-S; scalable; semantic Web services; unified; Web service discovery; Web service publication", } @Article{Golbeck:2009:TNP, author = "Jennifer Golbeck", title = "Trust and nuanced profile similarity in online social networks", journal = j-TWEB, volume = "3", number = "4", pages = "12:1--12:??", month = sep, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1594173.1594174", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:43 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Singh:2009:SSO, author = "Aameek Singh and Mudhakar Srivatsa and Ling Liu", title = "Search-as-a-service: {Outsourced} search over outsourced storage", journal = j-TWEB, volume = "3", number = "4", pages = "13:1--13:??", month = sep, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1594173.1594175", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:43 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Robu:2009:ECS, author = "Valentin Robu and Harry Halpin and Hana Shepherd", title = "Emergence of consensus and shared vocabularies in collaborative tagging systems", journal = j-TWEB, volume = "3", number = "4", pages = "14:1--14:??", month = sep, year = "2009", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1594173.1594176", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:43 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zheng:2010:UTM, author = "Yu Zheng and Yukun Chen and Quannan Li and Xing Xie and Wei-Ying Ma", title = "Understanding transportation modes based on {GPS} data for {Web} applications", journal = j-TWEB, volume = "4", number = "1", pages = "1:1--1:??", month = jan, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1658373.1658374", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:45 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Li:2010:DSO, author = "Guoli Li and Vinod Muthusamy and Hans-Arno Jacobsen", title = "A distributed service-oriented architecture for business process execution", journal = j-TWEB, volume = "4", number = "1", pages = "2:1--2:??", month = jan, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1658373.1658375", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:45 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Montali:2010:DSV, author = "Marco Montali and Maja Pesic and Wil M. P. van der Aalst and Federico Chesani and Paola Mello and Sergio Storari", title = "Declarative specification and verification of service choreographies", journal = j-TWEB, volume = "4", number = "1", pages = "3:1--3:??", month = jan, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1658373.1658376", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:45 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Almishari:2010:APD, author = "Mishari Almishari and Xiaowei Yang", title = "Ads-portal domains: {Identification} and measurements", journal = j-TWEB, volume = "4", number = "2", pages = "4:1--4:??", month = apr, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1734200.1734201", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "An ads-portal domain refers to a Web domain that shows only advertisements, served by a third-party advertisement syndication service, in the form of ads listing. We develop a machine-learning-based classifier to identify ads-portal domains, which has 96\% accuracy. We use this classifier to measure the prevalence of ads-portal domains on the Internet. Surprisingly, 28.3/25\% of the (two-level) {\tt *.com} /{\tt *.net} web domains are ads-portal domains. Also, 41/39.8\% of {\tt *.com} /{\tt *.net} ads-portal domains are typos of well-known domains, also known as typo-squatting domains. In addition, we use the classifier along with DNS trace files to estimate how often Internet users visit ads-portal domains. It turns out that$ \approx 5 \% $of the two-level {\tt *.com}, {\tt *.net}, {\tt *.org}, {\tt *.biz} and {\tt *.info} web domains on the traces are ads-portal domains and$
\approx 50 \% $of these accessed ads-portal domains are typos. These numbers show that ads-portal domains and typo-squatting ads-portal domains are prevalent on the Internet and successful in attracting many visits. Our classifier represents a step towards better categorizing the web documents. It can also be helpful to search engines ranking algorithms, helpful in identifying web spams that redirects to ads-portal domains, and used to discourage access to typo-squatting ads-portal domains.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Ads-portal; advertisement syndication; data mining; parked domain; parking service; Web characterization", } @Article{Jurca:2010:RIB, author = "Radu Jurca and Florent Garcin and Arjun Talwar and Boi Faltings", title = "Reporting incentives and biases in online review forums", journal = j-TWEB, volume = "4", number = "2", pages = "5:1--5:??", month = apr, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1734200.1734202", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Online reviews have become increasingly popular as a way to judge the quality of various products and services. However, recent work demonstrates that the absence of reporting incentives leads to a biased set of reviews that may not reflect the true quality. In this paper, we investigate underlying factors that influence users when reporting feedback. In particular, we study both reporting incentives and reporting biases observed in a widely used review forum, the Tripadvisor Web site. We consider three sources of information: first, the numerical ratings left by the user for different aspects of quality; second, the textual comment accompanying a review; third, the patterns in the time sequence of reports. We first show that groups of users who discuss a certain feature at length are more likely to agree in their ratings. Second, we show that users are more motivated to give feedback when they perceive a greater risk involved in a transaction. Third, a user's rating partly reflects the difference between true quality and prior expectation of quality, as inferred from previous reviews. We finally observe that because of these biases, when averaging review scores there are strong differences between the mean and the median. We speculate that the median may be a better way to summarize the ratings.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Online reviews; reputation mechanisms", } @Article{Vlachos:2010:ODB, author = "Michail Vlachos and Suleyman S. Kozat and Philip S. Yu", title = "Optimal distance bounds for fast search on compressed time-series query logs", journal = j-TWEB, volume = "4", number = "2", pages = "6:1--6:??", month = apr, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1734200.1734203", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Consider a database of time-series, where each datapoint in the series records the total number of users who asked for a specific query at an Internet search engine. Storage and analysis of such logs can be very beneficial for a search company from multiple perspectives. First, from a data organization perspective, because query Weblogs capture important trends and statistics, they can help enhance and optimize the search experience (keyword recommendation, discovery of news events). Second, Weblog data can provide an important polling mechanism for the microeconomic aspects of a search engine, since they can facilitate and promote the advertising facet of the search engine (understand what users request and when they request it).\par Due to the sheer amount of time-series Weblogs, manipulation of the logs in a compressed form is an impeding necessity for fast data processing and compact storage requirements. Here, we explicate how to compute the lower and upper distance bounds on the time-series logs when working directly on their compressed form. Optimal distance estimation means tighter bounds, leading to better candidate selection/elimination and ultimately faster search performance. Our derivation of the optimal distance bounds is based on the careful analysis of the problem using optimization principles. The experimental evaluation suggests a clear performance advantage of the proposed method, compared to previous compression/search techniques. The presented method results in a 10--30\% improvement on distance estimations, which in turn leads to 25--80\% improvement on the search performance.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Fraternali:2010:ERI, author = "Piero Fraternali and Sara Comai and Alessandro Bozzon and Giovanni Toffetti Carughi", title = "Engineering rich {Internet} applications with a model-driven approach", journal = j-TWEB, volume = "4", number = "2", pages = "7:1--7:??", month = apr, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1734200.1734204", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Rich Internet Applications (RIAs) have introduced powerful novel functionalities into the Web architecture, borrowed from client-server and desktop applications. The resulting platforms allow designers to improve the user's experience, by exploiting client-side data and computation, bidirectional client-server communication, synchronous and asynchronous events, and rich interface widgets. However, the rapid evolution of RIA technologies challenges the Model-Driven Development methodologies that have been successfully applied in the past decade to traditional Web solutions. This paper illustrates an evolutionary approach for incorporating a wealth of RIA features into an existing Web engineering methodology and notation. The experience demonstrates that it is possible to model RIA application requirements at a high-level using a platform-independent notation, and generate the client-side and server-side code automatically. The resulting approach is evaluated in terms of expressive power, ease of use, and implementability.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "information interfaces and presentation; information storage and retrieval; model-driven development; Rich Internet applications; Web engineering", } @Article{Xiao:2010:LSS, author = "Xiangye Xiao and Qiong Luo and Zhisheng Li and Xing Xie and Wei-Ying Ma", title = "A large-scale study on map search logs", journal = j-TWEB, volume = "4", number = "3", pages = "8:1--8:??", month = jul, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1806916.1806917", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Map search engines, such as Google Maps, Yahoo! Maps, and Microsoft Live Maps, allow users to explicitly specify a target geographic location, either in keywords or on the map, and to search businesses, people, and other information of that location. In this article, we report a first study on a million-entry map search log. We identify three key attributes of a map search record --- the keyword query, the target location and the user location, and examine the characteristics of these three dimensions separately as well as the associations between them. Comparing our results with those previously reported on logs of general search engines and mobile search engines, including those for geographic queries, we discover the following unique features of map search: (1) People use longer queries and modify queries more frequently in a session than in general search and mobile search; People view fewer result pages per query than in general search; (2) The popular query topics in map search are different from those in general search and mobile search; (3) The target locations in a session change within 50 kilometers for almost 80\% of the sessions; (4) Queries, search target locations and user locations (both at the city level) all follow the power law distribution; (5) One third of queries are issued for target locations within 50 kilometers from the user locations; (6) The distribution of a query over target locations appears to follow the geographic location of the queried entity.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "local search; log analysis; Map search; query categorization; search interface; user behavior", } @Article{Malak:2010:MWQ, author = "Ghazwa Malak and Houari Sahraoui and Linda Badri and Mourad Badri", title = "Modeling {Web} quality using a probabilistic approach: {An} empirical validation", journal = j-TWEB, volume = "4", number = "3", pages = "9:1--9:??", month = jul, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1806916.1806918", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web-based applications are software systems that continuously evolve to meet users' needs and to adapt to new technologies. Assuring their quality is then a difficult, but essential task. In fact, a large number of factors can affect their quality. Considering these factors and their interaction involves managing uncertainty and subjectivity inherent to this kind of applications. In this article, we present a probabilistic approach for building Web quality models and the associated assessment method. The proposed approach is based on Bayesian Networks. A model is built following a four-step process consisting in collecting quality characteristics, refining them, building a model structure, and deriving the model parameters.\par The feasibility of the approach is illustrated on the important quality characteristic of {\em Navigability design}. To validate the produced model, we conducted an experimental study with 20 subjects and 40 web pages. The results obtained show that the scores given by the used model are strongly correlated with navigability as perceived and experienced by the users.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Bayesian networks; Navigability design; probabilistic approach; quality evaluation; Web applications", } @Article{Poblete:2010:PPQ, author = "Barbara Poblete and Myra Spiliopoulou and Ricardo Baeza-Yates", title = "Privacy-preserving query log mining for business confidentiality protection", journal = j-TWEB, volume = "4", number = "3", pages = "10:1--10:??", month = jul, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1806916.1806919", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We introduce the concern of confidentiality protection of business information for the publication of search engine query logs and derived data. We study business confidentiality, as the protection of nonpublic data from institutions, such as companies and people in the public eye. In particular, we relate this concern to the involuntary exposure of confidential Web site information, and we transfer this problem into the field of privacy-preserving data mining. We characterize the possible adversaries interested in disclosing Web site confidential data and the attack strategies that they could use. These attacks are based on different vulnerabilities found in query log for which we present several anonymization heuristics to prevent them. We perform an experimental evaluation to estimate the remaining utility of the log after the application of our anonymization techniques. Our experimental results show that a query log can be anonymized against these specific attacks while retaining a significant volume of useful data.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Privacy preservation; queries; query log publication; Web sites", } @Article{Consens:2010:EXW, author = "Mariano P. Consens and Ren{\'e}e J. Miller and Flavio Rizzolo and Alejandro A. Vaisman", title = "Exploring {XML} {Web} collections with {DescribeX}", journal = j-TWEB, volume = "4", number = "3", pages = "11:1--11:??", month = jul, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1806916.1806920", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "As Web applications mature and evolve, the nature of the semistructured data that drives these applications also changes. An important trend is the need for increased flexibility in the structure of Web documents. Hence, applications cannot rely solely on schemas to provide the complex knowledge needed to visualize, use, query and manage documents. Even when XML Web documents are valid with regard to a schema, the actual structure of such documents may exhibit significant variations across collections for several reasons: the schema may be very lax (e.g., RSS feeds), the schema may be large and different subsets of it may be used in different documents (e.g., industry standards like UBL), or open content models may allow arbitrary schemas to be mixed (e.g., RSS extensions like those used for podcasting). For these reasons, many applications that incorporate XPath queries to process a large Web document collection require an understanding of the actual structure present in the collection, and not just the schema.\par To support modern Web applications, we introduce DescribeX, a powerful framework that is capable of describing complex XML summaries of Web collections. DescribeX supports the construction of heterogeneous summaries that can be declaratively defined and refined by means of axis path regular expression (AxPREs). AxPREs provide the flexibility necessary for declaratively defining complex mappings between instance nodes (in the documents) and summary nodes. These mappings are capable of expressing order and cardinality, among other properties, which can significantly help in the understanding of the structure of large collections of XML documents and enhance the performance of Web applications over these collections. DescribeX captures most summary proposals in the literature by providing (for the first time) a common declarative definition for them. Experimental results demonstrate the scalability of DescribeX summary operations (summary creation, as well as refinement and stabilization, two key enablers for tailoring summaries) on multi-gigabyte Web collections.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Semistructured data; structural summaries; XML; XPath", } @Article{Adams:2010:DLS, author = "Brett Adams and Dinh Phung and Svetha Venkatesh", title = "Discovery of latent subcommunities in a blog's readership", journal = j-TWEB, volume = "4", number = "3", pages = "12:1--12:??", month = jul, year = "2010", CODEN = "????", DOI = "http://doi.acm.org/10.1145/1806916.1806921", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The blogosphere has grown to be a mainstream forum of social interaction as well as a commercially attractive source of information and influence. Tools are needed to better understand how communities that adhere to individual blogs are constituted in order to facilitate new personal, socially-focused browsing paradigms, and understand how blog content is consumed, which is of interest to blog authors, big media, and search. We present a novel approach to blog subcommunity characterization by modeling individual blog readers using mixtures of an extension to the LDA family that jointly models phrases and time, Ngram Topic over Time (NTOT), and cluster with a number of similarity measures using Affinity Propagation. We experiment with two datasets: a small set of blogs whose authors provide feedback, and a set of popular, highly commented blogs, which provide indicators of algorithm scalability and interpretability without prior knowledge of a given blog. The results offer useful insight to the blog authors about their commenting community, and are observed to offer an integrated perspective on the topics of discussion and members engaged in those discussions for unfamiliar blogs. Our approach also holds promise as a component of solutions to related problems, such as online entity resolution and role discovery.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "affinity propagation; Blog; topic models; Web communities", } @Article{Kiciman:2010:APR, author = "Emre Kiciman and Benjamin Livshits", title = "{AjaxScope}: {A} Platform for Remotely Monitoring the Client-Side Behavior of {Web 2.0} Applications", journal = j-TWEB, volume = "4", number = "4", pages = "13:1--13:??", month = sep, year = "2010", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1841909.1841910", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bex:2010:LDR, author = "Geert Jan Bex and Wouter Gelade and Frank Neven and Stijn Vansummeren", title = "Learning Deterministic Regular Expressions for the Inference of Schemas from {XML} Data", journal = j-TWEB, volume = "4", number = "4", pages = "14:1--14:??", month = sep, year = "2010", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1841909.1841911", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bailey:2010:MHQ, author = "Peter Bailey and Ryen W. White and Han Liu and Giridhar Kumaran", title = "Mining Historic Query Trails to Label Long and Rare Search Engine Queries", journal = j-TWEB, volume = "4", number = "4", pages = "15:1--15:??", month = sep, year = "2010", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1841909.1841912", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Claude:2010:FCW, author = "Francisco Claude and Gonzalo Navarro", title = "Fast and Compact {Web} Graph Representations", journal = j-TWEB, volume = "4", number = "4", pages = "16:1--16:??", month = sep, year = "2010", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1841909.1841913", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Swaminathan:2010:RRM, author = "Ashwin Swaminathan and Renan G. Cattelan and Ydo Wexler and Cherian V. Mathew and Darko Kirovski", title = "Relating Reputation and Money in Online Markets", journal = j-TWEB, volume = "4", number = "4", pages = "17:1--17:??", month = sep, year = "2010", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1841909.1841914", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Riedl:2011:ISI, author = "John Riedl and Barry Smyth", title = "Introduction to special issue on recommender systems", journal = j-TWEB, volume = "5", number = "1", pages = "1:1--1:??", month = feb, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1921591.1921592", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cacheda:2011:CCF, author = "Fidel Cacheda and V{\'\i}ctor Carneiro and Diego Fern{\'a}ndez and Vreixo Formoso", title = "Comparison of collaborative filtering algorithms: Limitations of current techniques and proposals for scalable, high-performance recommender systems", journal = j-TWEB, volume = "5", number = "1", pages = "2:1--2:??", month = feb, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1921591.1921593", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Umyarov:2011:UEA, author = "Akhmed Umyarov and Alexander Tuzhilin", title = "Using external aggregate ratings for improving individual recommendations", journal = j-TWEB, volume = "5", number = "1", pages = "3:1--3:??", month = feb, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1921591.1921594", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Song:2011:ATR, author = "Yang Song and Lu Zhang and C. Lee Giles", title = "Automatic tag recommendation algorithms for social recommender systems", journal = j-TWEB, volume = "5", number = "1", pages = "4:1--4:??", month = feb, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1921591.1921595", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zheng:2011:RFL, author = "Yu Zheng and Lizhu Zhang and Zhengxin Ma and Xing Xie and Wei-Ying Ma", title = "Recommending friends and locations based on individual location history", journal = j-TWEB, volume = "5", number = "1", pages = "5:1--5:??", month = feb, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1921591.1921596", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wu:2011:TDQ, author = "Mingfang Wu and Falk Scholer and Andrew Turpin", title = "Topic Distillation with Query-Dependent Link Connections and Page Characteristics", journal = j-TWEB, volume = "5", number = "2", pages = "6:1--6:??", month = may, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1961659.1961660", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Hurley:2011:HBP, author = "John Hurley and Emi Garcia-Palacios and Sakir Sezer", title = "Host-Based {P2P} Flow Identification and Use in Real-Time", journal = j-TWEB, volume = "5", number = "2", pages = "7:1--7:??", month = may, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1961659.1961661", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Mitra:2011:CWB, author = "Siddharth Mitra and Mayank Agrawal and Amit Yadav and Niklas Carlsson and Derek Eager and Anirban Mahanti", title = "Characterizing {Web}-Based Video Sharing Workloads", journal = j-TWEB, volume = "5", number = "2", pages = "8:1--8:??", month = may, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1961659.1961662", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Ozcan:2011:CAS, author = "Rifat Ozcan and Ismail Sengor Altingovde and {\"O}zg{\"u}r Ulusoy", title = "Cost-Aware Strategies for Query Result Caching in {Web} Search Engines", journal = j-TWEB, volume = "5", number = "2", pages = "9:1--9:??", month = may, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1961659.1961663", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Valderas:2011:SRS, author = "Pedro Valderas and Vicente Pelechano", title = "A Survey of Requirements Specification in Model-Driven Development of {Web} Applications", journal = j-TWEB, volume = "5", number = "2", pages = "10:1--10:??", month = may, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1961659.1961664", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Grier:2011:DIO, author = "Chris Grier and Shuo Tang and Samuel T. King", title = "Designing and Implementing the {OP} and {OP2} {Web} Browsers", journal = j-TWEB, volume = "5", number = "2", pages = "11:1--11:??", month = may, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1961659.1961665", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Koutsonikola:2011:CDL, author = "Vassiliki Koutsonikola and Athena Vakali", title = "A Clustering-Driven {LDAP} Framework", journal = j-TWEB, volume = "5", number = "3", pages = "12:1--12:??", month = jul, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1993053.1993054", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Paci:2011:AAC, author = "Federica Paci and Massimo Mecella and Mourad Ouzzani and Elisa Bertino", title = "{ACConv} -- An Access Control Model for Conversational {Web} Services", journal = j-TWEB, volume = "5", number = "3", pages = "13:1--13:??", month = jul, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1993053.1993055", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zeginis:2011:CDR, author = "Dimitris Zeginis and Yannis Tzitzikas and Vassilis Christophides", title = "On Computing Deltas of {RDF/S} Knowledge Bases", journal = j-TWEB, volume = "5", number = "3", pages = "14:1--14:??", month = jul, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1993053.1993056", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Baykan:2011:CSF, author = "Eda Baykan and Monika Henzinger and Ludmila Marian and Ingmar Weber", title = "A Comprehensive Study of Features and Algorithms for {URL}-Based Topic Classification", journal = j-TWEB, volume = "5", number = "3", pages = "15:1--15:??", month = jul, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1993053.1993057", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Tuchinda:2011:BMD, author = "Rattapoom Tuchinda and Craig A. Knoblock and Pedro Szekely", title = "Building Mashups by Demonstration", journal = j-TWEB, volume = "5", number = "3", pages = "16:1--16:??", month = jul, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/1993053.1993058", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Alzoubi:2011:PAA, author = "Hussein A. Alzoubi and Seungjoon Lee and Michael Rabinovich and Oliver Spatscheck and Jacobus {Van Der Merwe}", title = "A Practical Architecture for an {Anycast CDN}", journal = j-TWEB, volume = "5", number = "4", pages = "17:1--17:??", month = oct, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2019643.2019644", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "IP Anycast has many attractive features for any service that involve the replication of multiple instances across the Internet. IP Anycast allows multiple instances of the same service to be naturally' discovered, and requests for this service to be delivered to the closest instance. However, while briefly considered as an enabler for content delivery networks (CDNs) when they first emerged, IP Anycast was deemed infeasible in that environment. The main reasons for this decision were the lack of load awareness of IP Anycast and unwanted side effects of Internet routing changes on the IP Anycast mechanism. In this article we re-evaluate IP Anycast for CDNs by proposing a load-aware IP Anycast CDN architecture. Our architecture is prompted by recent developments in route control technology, as well as better understanding of the behavior of IP Anycast in operational settings. Our architecture makes use of route control mechanisms to take server and network load into account to realize load-aware Anycast. We show that the resulting redirection requirements can be formulated as a Generalized Assignment Problem and present practical algorithms that address these requirements while at the same time limiting connection disruptions that plague regular IP Anycast. We evaluate our algorithms through trace based simulation using traces obtained from a production CDN network.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bar-Yossef:2011:ESE, author = "Ziv Bar-Yossef and Maxim Gurevich", title = "Efficient Search Engine Measurements", journal = j-TWEB, volume = "5", number = "4", pages = "18:1--18:??", month = oct, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2019643.2019645", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We address the problem of externally measuring aggregate functions over documents indexed by search engines, like corpus size, index freshness, and density of duplicates in the corpus. State of the art estimators for such quantities [Bar-Yossef and Gurevich 2008b; Broder et al. 2006] are biased due to inaccurate approximation of the so called document degrees'. In addition, the estimators in Bar-Yossef and Gurevich [2008b] are quite costly, due to their reliance on rejection sampling. We present new estimators that are able to overcome the bias introduced by approximate degrees. Our estimators are based on a careful implementation of an approximate importance sampling procedure. Comprehensive theoretical and empirical analysis of the estimators demonstrates that they have essentially no bias even in situations where document degrees are poorly approximated. By avoiding the costly rejection sampling approach, our new importance sampling estimators are significantly more efficient than the estimators proposed in Bar-Yossef and Gurevich [2008b]. Furthermore, building on an idea from Broder et al. [2006], we discuss Rao-Blackwellization as a generic method for reducing variance in search engine estimators. We show that Rao-Blackwellizing our estimators results in performance improvements, without compromising accuracy.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Gill:2011:COU, author = "Phillipa Gill and Martin Arlitt and Niklas Carlsson and Anirban Mahanti and Carey Williamson", title = "Characterizing Organizational Use of {Web}-Based Services: Methodology, Challenges, Observations, and Insights", journal = j-TWEB, volume = "5", number = "4", pages = "19:1--19:??", month = oct, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2019643.2019646", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Today's Web provides many different functionalities, including communication, entertainment, social networking, and information retrieval. In this article, we analyze traces of HTTP activity from a large enterprise and from a large university to identify and characterize Web-based service usage. Our work provides an initial methodology for the analysis of Web-based services. While it is nontrivial to identify the classes, instances, and providers for each transaction, our results show that most of the traffic comes from a small subset of providers, which can be classified manually. Furthermore, we assess both qualitatively and quantitatively how the Web has evolved over the past decade, and discuss the implications of these changes.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Singla:2011:CBC, author = "Adish Singla and Ingmar Weber", title = "Camera Brand Congruence and Camera Model Propagation in the {Flickr} Social Graph", journal = j-TWEB, volume = "5", number = "4", pages = "20:1--20:??", month = oct, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2019643.2019647", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Given that my friends on Flickr use cameras of brand X, am I more likely to also use a camera of brand X? Given that one of these friends changes her brand, am I likely to do the same? Do new camera models pop up uniformly in the friendship graph? Or do early adopters then convert' their friends? Which factors influence the conversion probability of a user? These are the kind of questions addressed in this work. Direct applications involve personalized advertising in social networks. For our study, we crawled a complete connected component of the Flickr friendship graph with a total of 67M edges and 3.9M users. 1.2M of these users had at least one public photograph with valid model metadata, which allowed us to assign camera brands and models to users and time slots. Similarly, we used, where provided in a user's profile, information about a user's geographic location and the groups joined on Flickr. Concerning brand congruence, our main findings are the following. First, a pair of friends on Flickr has a higher probability of being congruent, that is, using the same brand, compared to two random users (27\% vs. 19\%). Second, the degree of congruence goes up for pairs of friends (i) in the same country (29\%), (ii) who both only have very few friends (30\%), and (iii) with a very high cliqueness (38\%). Third, given that a user changes her camera model between March-May 2007 and March-May 2008, high cliqueness friends are more likely than random users to do the same (54\% vs. 48\%). Fourth, users using high-end cameras are far more loyal to their brand than users using point-and-shoot cameras, with a probability of staying with the same brand of 60\% vs 33\%, given that a new camera is bought. Fifth, these expert' users' brand congruence reaches 66\% for high cliqueness friends. All these differences are statistically significant at 1\%. As for the propagation of new models in the friendship graph, we observe the following. First, the growth of connected components of users converted to a particular, new camera model differs distinctly from random growth. Second, the decline of dissemination of a particular model is close to random decline. This illustrates that users influence their friends to change to a particular new model, rather than from a particular old model. Third, having many converted friends increases the probability of the user to convert herself. Here differences between friends from the same or from different countries are more pronounced for point-and-shoot than for digital single-lens reflex users. Fourth, there was again a distinct difference between arbitrary friends and high cliqueness friends in terms of prediction quality for conversion.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Curlango-Rosas:2011:SSA, author = "Cecilia Curlango-Rosas and Gregorio A. Ponce and Gabriel A. Lopez-Morteo", title = "A Specialized Search Assistant for Learning Objects", journal = j-TWEB, volume = "5", number = "4", pages = "21:1--21:??", month = oct, year = "2011", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2019643.2019648", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Web holds a great quantity of material that can be used to enhance classroom instruction. However, it is not easy to retrieve this material with the search engines currently available. This study produced a specialized search assistant based on Google that significantly increases the number of instances in which teachers find the desired learning objects as compared to using this popular public search engine directly. Success in finding learning objects by study participants went from 80\% using Google alone to 96\% when using our search assistant in one scenario and, in another scenario, from a 40\% success rate with Google alone to 66\% with our assistant. This specialized search assistant implements features such as bilingual search and term suggestion which were requested by teacher participants to help improve their searches. Study participants evaluated the specialized search assistant and found it significantly easier to use and more useful than the popular search engine for the purpose of finding learning objects.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zhu:2012:CLS, author = "Guangyu Zhu and Gilad Mishne", title = "{ClickRank}: Learning Session-Context Models to Enrich {Web} Search Ranking", journal = j-TWEB, volume = "6", number = "1", pages = "1:1--1:??", month = mar, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2109205.2109206", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "User browsing information, particularly non-search-related activity, reveals important contextual information on the preferences and intents of Web users. In this article, we demonstrate the importance of mining general Web user behavior data to improve ranking and other Web-search experience, with an emphasis on analyzing individual user sessions for creating aggregate models. In this context, we introduce ClickRank, an efficient, scalable algorithm for estimating Webpage and Website importance from general Web user-behavior data. We lay out the theoretical foundation of ClickRank based on an intentional surfer model and discuss its properties. We quantitatively evaluate its effectiveness regarding the problem of Web-search ranking, showing that it contributes significantly to retrieval performance as a novel Web-search feature. We demonstrate that the results produced by ClickRank for Web-search ranking are highly competitive with those produced by other approaches, yet achieved at better scalability and substantially lower computational costs. Finally, we discuss novel applications of ClickRank in providing enriched user Web-search experience, highlighting the usefulness of our approach for nonranking tasks.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liu:2012:IWS, author = "Yiqun Liu and Fei Chen and Weize Kong and Huijia Yu and Min Zhang and Shaoping Ma and Liyun Ru", title = "Identifying {Web} Spam with the Wisdom of the Crowds", journal = j-TWEB, volume = "6", number = "1", pages = "2:1--2:??", month = mar, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2109205.2109207", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Combating Web spam has become one of the top challenges for Web search engines. State-of-the-art spam-detection techniques are usually designed for specific, known types of Web spam and are incapable of dealing with newly appearing spam types efficiently. With user-behavior analyses from Web access logs, a spam page-detection algorithm is proposed based on a learning scheme. The main contributions are the following. (1) User-visiting patterns of spam pages are studied, and a number of user-behavior features are proposed for separating Web spam pages from ordinary pages. (2) A novel spam-detection framework is proposed that can detect various kinds of Web spam, including newly appearing ones, with the help of the user-behavior analysis. Experiments on large-scale practical Web access log data show the effectiveness of the proposed features and the detection framework.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Mesbah:2012:CAB, author = "Ali Mesbah and Arie van Deursen and Stefan Lenselink", title = "Crawling {Ajax}-Based {Web} Applications through Dynamic Analysis of User Interface State Changes", journal = j-TWEB, volume = "6", number = "1", pages = "3:1--3:??", month = mar, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2109205.2109208", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Using JavaScript and dynamic DOM manipulation on the client side of Web applications is becoming a widespread approach for achieving rich interactivity and responsiveness in modern Web applications. At the same time, such techniques---collectively known as Ajax---shatter the concept of webpages with unique URLs, on which traditional Web crawlers are based. This article describes a novel technique for crawling Ajax-based applications through automatic dynamic analysis of user-interface-state changes in Web browsers. Our algorithm scans the DOM tree, spots candidate elements that are capable of changing the state, fires events on those candidate elements, and incrementally infers a state machine that models the various navigational paths and states within an Ajax application. This inferred model can be used in program comprehension and in analysis and testing of dynamic Web states, for instance, or for generating a static version of the application. In this article, we discuss our sequential and concurrent Ajax crawling algorithms. We present our open source tool called Crawljax, which implements the concepts and algorithms discussed in this article. Additionally, we report a number of empirical studies in which we apply our approach to a number of open-source and industrial Web applications and elaborate on the obtained results.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Lauw:2012:QLO, author = "Hady W. Lauw and Ee-Peng Lim and Ke Wang", title = "Quality and Leniency in Online Collaborative Rating Systems", journal = j-TWEB, volume = "6", number = "1", pages = "4:1--4:??", month = mar, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2109205.2109209", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The emerging trend of social information processing has resulted in Web users' increased reliance on user-generated content contributed by others for information searching and decision making. Rating scores, a form of user-generated content contributed by reviewers in online rating systems, allow users to leverage others' opinions in the evaluation of objects. In this article, we focus on the problem of summarizing the rating scores given to an object into an overall score that reflects the object's quality. We observe that the existing approaches for summarizing scores largely ignores the effect of reviewers exercising different standards in assigning scores. Instead of treating all reviewers as equals, our approach models the leniency of reviewers, which refers to the tendency of a reviewer to assign higher scores than other coreviewers. Our approach is underlined by two insights: (1) The leniency of a reviewer depends not only on how the reviewer rates objects, but also on how other reviewers rate those objects and (2) The leniency of a reviewer and the quality of rated objects are mutually dependent. We develop the leniency-aware quality, or LQ model, which solves leniency and quality simultaneously. We introduce both an exact and a ranked solution to the model. Experiments on real-life and synthetic datasets show that LQ is more effective than comparable approaches. LQ is also shown to perform consistently better under different parameter settings.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Ashman:2012:E, author = "Helen Ashman and Arun Iyengar and Marc Najork", title = "Editorial", journal = j-TWEB, volume = "6", number = "2", pages = "5:1--5:??", month = may, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2180861.2180862", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{DeCapitaniDiVimercati:2012:ITM, author = "Sabrina {De Capitani Di Vimercati} and Sara Foresti and Sushil Jajodia and Stefano Paraboschi and Giuseppe Psaila and Pierangela Samarati", title = "Integrating trust management and access control in data-intensive {Web} applications", journal = j-TWEB, volume = "6", number = "2", pages = "6:1--6:??", month = may, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2180861.2180863", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The widespread diffusion of Web-based services provided by public and private organizations emphasizes the need for a flexible solution for protecting the information accessible through Web applications. A promising approach is represented by credential-based access control and trust management. However, although much research has been done and several proposals exist, a clear obstacle to the realization of their benefits in data-intensive Web applications is represented by the lack of adequate support in the DBMSs. As a matter of fact, DBMSs are often responsible for the management of most of the information that is accessed using a Web browser or a Web service invocation. In this article, we aim at eliminating this gap, and present an approach integrating trust management with the access control of the DBMS. We propose a trust model with a SQL syntax and illustrate an algorithm for the efficient verification of a delegation path for certificates. Our solution nicely complements current trust management proposals allowing the efficient realization of the services of an advanced trust management model within current relational DBMSs. An important benefit of our approach lies in its potential for a robust end-to-end design of security for personal data in Web scenario, where vulnerabilities of Web applications cannot be used to violate the protection of the data residing on the database server. We also illustrate the implementation of our approach within an open-source DBMS discussing design choices and performance impact.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Alrifai:2012:HAE, author = "Mohammad Alrifai and Thomas Risse and Wolfgang Nejdl", title = "A hybrid approach for efficient {Web} service composition with end-to-end {QoS} constraints", journal = j-TWEB, volume = "6", number = "2", pages = "7:1--7:??", month = may, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2180861.2180864", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Dynamic selection of Web services at runtime is important for building flexible and loosely-coupled service-oriented applications. An abstract description of the required services is provided at design-time, and matching service offers are located at runtime. With the growing number of Web services that provide the same functionality but differ in quality parameters (e.g., availability, response time), a decision needs to be made on which services should be selected such that the user's end-to-end QoS requirements are satisfied. Although very efficient, local selection strategy fails short in handling global QoS requirements. Solutions based on global optimization, on the other hand, can handle global constraints, but their poor performance renders them inappropriate for applications with dynamic and realtime requirements. In this article we address this problem and propose a hybrid solution that combines global optimization with local selection techniques to benefit from the advantages of both worlds. The proposed solution consists of two steps: first, we use mixed integer programming (MIP) to find the optimal decomposition of global QoS constraints into local constraints. Second, we use distributed local selection to find the best Web services that satisfy these local constraints. The results of experimental evaluation indicate that our approach significantly outperforms existing solutions in terms of computation time while achieving close-to-optimal results.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Desnoyers:2012:MAM, author = "Peter Desnoyers and Timothy Wood and Prashant Shenoy and Rahul Singh and Sangameshwar Patil and Harrick Vin", title = "{Modellus}: Automated modeling of complex {Internet} data center applications", journal = j-TWEB, volume = "6", number = "2", pages = "8:1--8:??", month = may, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2180861.2180865", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The rising complexity of distributed server applications in Internet data centers has made the tasks of modeling and analyzing their behavior increasingly difficult. This article presents Modellus, a novel system for automated modeling of complex web-based data center applications using methods from queuing theory, data mining, and machine learning. Modellus uses queuing theory and statistical methods to automatically derive models to predict the resource usage of an application and the workload it triggers; these models can be composed to capture multiple dependencies between interacting applications. Model accuracy is maintained by fast, distributed testing, automated relearning of models when they change, and methods to bound prediction errors in composite models. We have implemented a prototype of Modellus, deployed it on a data center testbed, and evaluated its efficacy for modeling and analysis of several distributed multitier web applications. Our results show that this feature-based modeling technique is able to make predictions across several data center tiers, and maintain predictive accuracy (typically 95\% or better) in the face of significant shifts in workload composition; we also demonstrate practical applications of the Modellus system to prediction and provisioning of real-world data center applications.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Aiello:2012:FPH, author = "Luca Maria Aiello and Alain Barrat and Rossano Schifanella and Ciro Cattuto and Benjamin Markines and Filippo Menczer", title = "Friendship prediction and homophily in social media", journal = j-TWEB, volume = "6", number = "2", pages = "9:1--9:??", month = may, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2180861.2180866", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Social media have attracted considerable attention because their open-ended nature allows users to create lightweight semantic scaffolding to organize and share content. To date, the interplay of the social and topical components of social media has been only partially explored. Here, we study the presence of homophily in three systems that combine tagging social media with online social networks. We find a substantial level of topical similarity among users who are close to each other in the social network. We introduce a null model that preserves user activity while removing local correlations, allowing us to disentangle the actual local similarity between users from statistical effects due to the assortative mixing of user activity and centrality in the social network. This analysis suggests that users with similar interests are more likely to be friends, and therefore topical similarity measures among users based solely on their annotation metadata should be predictive of social links. We test this hypothesis on several datasets, confirming that social networks constructed from topical similarity capture actual friendship accurately. When combined with topological features, topical similarity achieves a link prediction accuracy of about 92\%.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Comai:2012:MDM, author = "Sara Comai and Davide Mazza", title = "A model-driven methodology to the content layout problem in {Web} applications", journal = j-TWEB, volume = "6", number = "3", pages = "10:1--10:38", month = sep, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2344416.2344417", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/texbook3.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article presents a model-driven approach for the design of the layout in a complex Web application, where large amounts of data are accessed. The aim of this work is to reduce, as much as possible, repetitive tasks and to factor out common aspects into different kinds of rules that can be reused across different applications. In particular, exploiting the conceptual elements of the typical models used for the design of a Web application, it defines presentation and layout rules at different levels of abstraction and granularity. A procedure for the automatic layout of the content of a page is proposed and evaluated, and the layout of advanced Web applications is discussed.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Automatic contents layout; graphical visualization and rendering; Web applications design", } @Article{Merhav:2012:EIN, author = "Yuval Merhav and Filipe Mesquita and Denilson Barbosa and Wai Gen Yee and Ophir Frieder", title = "Extracting information networks from the blogosphere", journal = j-TWEB, volume = "6", number = "3", pages = "11:1--11:??", month = sep, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2344416.2344418", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We study the problem of automatically extracting information networks formed by recognizable entities as well as relations among them from social media sites. Our approach consists of using state-of-the-art natural language processing tools to identify entities and extract sentences that relate such entities, followed by using text-clustering algorithms to identify the relations within the information network. We propose a new term-weighting scheme that significantly improves on the state-of-the-art in the task of relation extraction, both when used in conjunction with the standard tf$ \cdot $idf scheme and also when used as a pruning filter. We describe an effective method for identifying benchmarks for open information extraction that relies on a curated online database that is comparable to the hand-crafted evaluation datasets in the literature. From this benchmark, we derive a much larger dataset which mimics realistic conditions for the task of open information extraction. We report on extensive experiments on both datasets, which not only shed light on the accuracy levels achieved by state-of-the-art open information extraction tools, but also on how to tune such tools for better results.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Miliaraki:2012:FDS, author = "Iris Miliaraki and Manolis Koubarakis", title = "{FoXtrot}: Distributed structural and value {XML} filtering", journal = j-TWEB, volume = "6", number = "3", pages = "12:1--12:??", month = sep, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2344416.2344419", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Publish/subscribe systems have emerged in recent years as a promising paradigm for offering various popular notification services. In this context, many XML filtering systems have been proposed to efficiently identify XML data that matches user interests expressed as queries in an XML query language like XPath. However, in order to offer XML filtering functionality on an Internet-scale, we need to deploy such a service in a distributed environment, avoiding bottlenecks that can deteriorate performance. In this work, we design and implement FoXtrot, a system for filtering XML data that combines the strengths of automata for efficient filtering and distributed hash tables for building a fully distributed system. Apart from structural-matching, performed using automata, we also discuss different methods for evaluating value-based predicates. We perform an extensive experimental evaluation of our system, FoXtrot, on a local cluster and on the PlanetLab network and demonstrate that it can index millions of user queries, achieving a high indexing and filtering throughput. At the same time, FoXtrot exhibits very good load-balancing properties and improves its performance as we increase the size of the network.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Dork:2012:NTW, author = "Marian D{\"o}rk and Carey Williamson and Sheelagh Carpendale", title = "Navigating tomorrow's web: From searching and browsing to visual exploration", journal = j-TWEB, volume = "6", number = "3", pages = "13:1--13:??", month = sep, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2344416.2344420", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We propose a new way of navigating the Web using interactive information visualizations, and present encouraging results from a large-scale Web study of a visual exploration system. While the Web has become an immense, diverse information space, it has also evolved into a powerful software platform. We believe that the established interaction techniques of searching and browsing do not sufficiently utilize these advances, since information seekers have to transform their information needs into specific, text-based search queries resulting in mostly text-based lists of resources. In contrast, we foresee a new type of information seeking that is high-level and more engaging, by providing the information seeker with interactive visualizations that give graphical overviews and enable query formulation. Building on recent work on faceted navigation, information visualization, and exploratory search, we conceptualize this type of information navigation as visual exploration and evaluate a prototype Web-based system that implements it. We discuss the results of a large-scale, mixed-method Web study that provides a better understanding of the potential benefits of visual exploration on the Web, and its particular performance challenges.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cambazoglu:2012:CBQ, author = "B. Barla Cambazoglu and Ismail Sengor Altingovde and Rifat Ozcan and {\"O}zg{\"u}r Ulusoy", title = "Cache-Based Query Processing for Search Engines", journal = j-TWEB, volume = "6", number = "4", pages = "14:1--14:??", month = nov, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2382616.2382617", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In practice, a search engine may fail to serve a query due to various reasons such as hardware/network failures, excessive query load, lack of matching documents, or service contract limitations (e.g., the query rate limits for third-party users of a search service). In this kind of scenarios, where the backend search system is unable to generate answers to queries, approximate answers can be generated by exploiting the previously computed query results available in the result cache of the search engine. In this work, we propose two alternative strategies to implement this cache-based query processing idea. The first strategy aggregates the results of similar queries that are previously cached in order to create synthetic results for new queries. The second strategy forms an inverted index over the textual information (i.e., query terms and result snippets) present in the result cache and uses this index to answer new queries. Both approaches achieve reasonable result qualities compared to processing queries with an inverted index built on the collection.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Delac:2012:MSS, author = "Goran Delac and Ivan Budiselic and Ivan Zuzak and Ivan Skuliber and Tomislav Stefanec", title = "A Methodology for {SIP} and {SOAP} Integration Using Application-Specific Protocol Conversion", journal = j-TWEB, volume = "6", number = "4", pages = "15:1--15:??", month = nov, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2382616.2382618", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In recent years, the ubiquitous demands for cross-protocol application access are driving the need for deeper integration between SIP and SOAP. In this article we present a novel methodology for integrating these two protocols. Through an analysis of properties of SIP and SOAP we show that integration between these protocols should be based on application-specific converters. We describe a generic SIP/SOAP gateway that implements message handling and network and storage management while relying on application-specific converters to define session management and message mapping for a specific set of SIP and SOAP communication nodes. In order to ease development of these converters, we introduce an XML-based domain-specific language for describing application-specific conversion processes. We show how conversion processes can be easily specified in the language using message sequence diagrams of the desired interaction. We evaluate the presented methodology through performance analysis of the developed prototype gateway and high-level comparison with other solutions.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Jeon:2012:WCP, author = "Myeongjae Jeon and Youngjae Kim and Jeaho Hwang and Joonwon Lee and Euiseong Seo", title = "Workload Characterization and Performance Implications of Large-Scale Blog Servers", journal = j-TWEB, volume = "6", number = "4", pages = "16:1--16:??", month = nov, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2382616.2382619", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "With the ever-increasing popularity of Social Network Services (SNSs), an understanding of the characteristics of these services and their effects on the behavior of their host servers is critical. However, there has been a lack of research on the workload characterization of servers running SNS applications such as blog services. To fill this void, we empirically characterized real-world Web server logs collected from one of the largest South Korean blog hosting sites for 12 consecutive days. The logs consist of more than 96 million HTTP requests and 4.7TB of network traffic. Our analysis reveals the following: (i) The transfer size of nonmultimedia files and blog articles can be modeled using a truncated Pareto distribution and a log-normal distribution, respectively; (ii) user access for blog articles does not show temporal locality, but is strongly biased towards those posted with image or audio files. We additionally discuss the potential performance improvement through clustering of small files on a blog page into contiguous disk blocks, which benefits from the observed file access patterns. Trace-driven simulations show that, on average, the suggested approach achieves 60.6\% better system throughput and reduces the processing time for file access by 30.8\% compared to the best performance of the Ext4 filesystem.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wilson:2012:BSG, author = "Christo Wilson and Alessandra Sala and Krishna P. N. Puttaswamy and Ben Y. Zhao", title = "Beyond Social Graphs: User Interactions in Online Social Networks and their Implications", journal = j-TWEB, volume = "6", number = "4", pages = "17:1--17:??", month = nov, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2382616.2382620", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Social networks are popular platforms for interaction, communication, and collaboration between friends. Researchers have recently proposed an emerging class of applications that leverage relationships from social networks to improve security and performance in applications such as email, Web browsing, and overlay routing. While these applications often cite social network connectivity statistics to support their designs, researchers in psychology and sociology have repeatedly cast doubt on the practice of inferring meaningful relationships from social network connections alone. This leads to the question: Are social links valid indicators of real user interaction? If not, then how can we quantify these factors to form a more accurate model for evaluating socially enhanced applications?'' In this article, we address this question through a detailed study of user interactions in the Facebook social network. We propose the use of interaction graphs'' to impart meaning to online social links by quantifying user interactions. We analyze interaction graphs derived from Facebook user traces and show that they exhibit significantly lower levels of the small-world'' properties present in their social graph counterparts. This means that these graphs have fewer supernodes'' with extremely high degree, and overall graph diameter increases significantly as a result. To quantify the impact of our observations, we use both types of graphs to validate several well-known social-based applications that rely on graph properties to infuse new functionality into Internet applications, including Reliable Email (RE), SybilGuard, and the weighted cascade influence maximization algorithm. The results reveal new insights into each of these systems, and confirm our hypothesis that to obtain realistic and accurate results, ongoing research on social network applications studies of social applications should use real indicators of user interactions in lieu of social graphs.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Weerkamp:2012:EEC, author = "Wouter Weerkamp and Krisztian Balog and Maarten de Rijke", title = "Exploiting External Collections for Query Expansion", journal = j-TWEB, volume = "6", number = "4", pages = "18:1--18:??", month = nov, year = "2012", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2382616.2382621", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A persisting challenge in the field of information retrieval is the vocabulary mismatch between a user's information need and the relevant documents. One way of addressing this issue is to apply query modeling: to add terms to the original query and reweigh the terms. In social media, where documents usually contain creative and noisy language (e.g., spelling and grammatical errors), query modeling proves difficult. To address this, attempts to use external sources for query modeling have been made and seem to be successful. In this article we propose a general generative query expansion model that uses external document collections for term generation: the External Expansion Model (EEM). The main rationale behind our model is our hypothesis that each query requires its own mixture of external collections for expansion and that an expansion model should account for this. For some queries we expect, for example, a news collection to be most beneficial, while for other queries we could benefit more by selecting terms from a general encyclopedia. EEM allows for query-dependent weighing of the external collections. We put our model to the test on the task of blog post retrieval and we use four external collections in our experiments: (i) a news collection, (ii) a Web collection, (iii) Wikipedia, and (iv) a blog post collection. Experiments show that EEM outperforms query expansion on the individual collections, as well as the Mixture of Relevance Models that was previously proposed by Diaz and Metzler [2006]. Extensive analysis of the results shows that our naive approach to estimating query-dependent collection importance works reasonably well and that, when we use oracle'' settings, we see the full potential of our model. We also find that the query-dependent collection importance has more impact on retrieval performance than the independent collection importance (i.e., a collection prior).", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wu:2013:MVC, author = "Ou Wu and Weiming Hu and Lei Shi", title = "Measuring the Visual Complexities of {Web} Pages", journal = j-TWEB, volume = "7", number = "1", pages = "1:1--1:??", month = mar, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2435215.2435216", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Visual complexities (VisComs) of Web pages significantly affect user experience, and automatic evaluation can facilitate a large number of Web-based applications. The construction of a model for measuring the VisComs of Web pages requires the extraction of typical features and learning based on labeled Web pages. However, as far as the authors are aware, little headway has been made on measuring VisCom in Web mining and machine learning. The present article provides a new approach combining Web mining techniques and machine learning algorithms for measuring the VisComs of Web pages. The structure of a Web page is first analyzed, and the layout is then extracted. Using a Web page as a semistructured image, three classes of features are extracted to construct a feature vector. The feature vector is fed into a learned measuring function to calculate the VisCom of the page. In the proposed approach of the present study, the type of the measuring function and its learning depend on the quantification strategy for VisCom. Aside from using a category and a score to represent VisCom as existing work, this study presents a new strategy utilizing a distribution to quantify the VisCom of a Web page. Empirical evaluation suggests the effectiveness of the proposed approach in terms of both features and learning algorithms.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Hanson:2013:PWA, author = "Vicki L. Hanson and John T. Richards", title = "Progress on {Website} Accessibility?", journal = j-TWEB, volume = "7", number = "1", pages = "2:1--2:??", month = mar, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2435215.2435217", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Over 100 top-traffic and government websites from the United States and United Kingdom were examined for evidence of changes on accessibility indicators over the 14-year period from 1999 to 2012, the longest period studied to date. Automated analyses of WCAG 2.0 Level A Success Criteria found high percentages of violations overall. Unlike more circumscribed studies, however, these sites exhibited improvements over the years on a number of accessibility indicators, with government sites being less likely than topsites to have accessibility violations. Examination of the causes of success and failure suggests that improving accessibility may be due, in part, to changes in website technologies and coding practices rather than a focus on accessibility per se.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Baykan:2013:CST, author = "Eda Baykan and Monika Henzinger and Ingmar Weber", title = "A Comprehensive Study of Techniques for {URL}-Based {Web} Page Language Classification", journal = j-TWEB, volume = "7", number = "1", pages = "3:1--3:??", month = mar, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2435215.2435218", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Given only the URL of a Web page, can we identify its language? In this article we examine this question. URL-based language classification is useful when the content of the Web page is not available or downloading the content is a waste of bandwidth and time. We built URL-based language classifiers for English, German, French, Spanish, and Italian by applying a variety of algorithms and features. As algorithms we used machine learning algorithms which are widely applied for text classification and state-of-art algorithms for language identification of text. As features we used words, various sized n-grams, and custom-made features (our novel feature set). We compared our approaches with two baseline methods, namely classification by country code top-level domains and classification by IP addresses of the hosting Web servers. We trained and tested our classifiers in a 10-fold cross-validation setup on a dataset obtained from the Open Directory Project and from querying a commercial search engine. We obtained the lowest F1-measure for English (94) and the highest F1-measure for German (98) with the best performing classifiers. We also evaluated the performance of our methods: (i) on a set of Web pages written in Adobe Flash and (ii) as part of a language-focused crawler. In the first case, the content of the Web page is hard to extract and in the second page downloading pages of the wrong'' language constitutes a waste of bandwidth. In both settings the best classifiers have a high accuracy with an F1-measure between 95 (for English) and 98 (for Italian) for the Adobe Flash pages and a precision between 90 (for Italian) and 97 (for French) for the language-focused crawler.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Marriott:2013:HAT, author = "Kim Marriott and Peter Moulder and Nathan Hurst", title = "{HTML} Automatic Table Layout", journal = j-TWEB, volume = "7", number = "1", pages = "4:1--4:??", month = mar, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2435215.2435219", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Automatic layout of tables is required in online applications because of the need to tailor the layout to the viewport width, choice of font, and dynamic content. However, if the table contains text, minimizing the height of the table for a fixed maximum width is NP-hard. Thus, more efficient heuristic algorithms are required. We evaluate the HTML table layout recommendation and find that while it generally produces quite compact layout it is brittle and can lead to quite uncompact layout. We present an alternate heuristic algorithm. It uses a greedy strategy that starts from the widest reasonable layout and repeatedly chooses to narrow the column for which narrowing leads to the least increase in table height. The algorithm is simple, fast enough to be used in online applications, and gives significantly more compact layout than is obtained with HTML's recommended table layout algorithm.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Anisetti:2013:TBS, author = "Marco Anisetti and Claudio A. Ardagna and Ernesto Damiani and Francesco Saonara", title = "A test-based security certification scheme for {Web} services", journal = j-TWEB, volume = "7", number = "2", pages = "5:1--5:??", month = may, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2460383.2460384", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Service-Oriented Architecture (SOA) paradigm is giving rise to a new generation of applications built by dynamically composing loosely coupled autonomous services. Clients (i.e., software agents acting on behalf of human users or service providers) implementing such complex applications typically search and integrate services on the basis of their functional requirements and of their trust in the service suppliers. A major issue in this scenario relates to the definition of an assurance technique allowing clients to select services on the basis of their nonfunctional requirements and increasing their confidence that the selected services will satisfy such requirements. In this article, we first present an assurance solution that focuses on security and supports a test-based security certification scheme for Web services. The certification scheme is driven by the security properties to be certified and relies upon a formal definition of the service model. The evidence supporting a certified property is computed using a model-based testing approach that, starting from the service model, automatically generates the test cases to be used in the service certification. We also define a set of indexes and metrics that evaluate the assurance level and the quality of the certification process. Finally, we present our evaluation toolkit and experimental results obtained applying our certification solution to a financial service implementing the Interactive Financial eXchange (IFX) standard.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Victor:2013:ETB, author = "Patricia Victor and Nele Verbiest and Chris Cornelis and Martine {De Cock}", title = "Enhancing the trust-based recommendation process with explicit distrust", journal = j-TWEB, volume = "7", number = "2", pages = "6:1--6:??", month = may, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2460383.2460385", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "When a Web application with a built-in recommender offers a social networking component which enables its users to form a trust network, it can generate more personalized recommendations by combining user ratings with information from the trust network. These are the so-called trust-enhanced recommendation systems. While research on the incorporation of trust for recommendations is thriving, the potential of explicitly stated distrust remains almost unexplored. In this article, we introduce a distrust-enhanced recommendation algorithm which has its roots in Golbeck's trust-based weighted mean. Through experiments on a set of reviews from Epinions.com, we show that our new algorithm outperforms its standard trust-only counterpart with respect to accuracy, thereby demonstrating the positive effect that explicit distrust can have on trust-based recommendations.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Yue:2013:MSI, author = "Chuan Yue and Haining Wang", title = "A measurement study of insecure {JavaScript} practices on the {Web}", journal = j-TWEB, volume = "7", number = "2", pages = "7:1--7:??", month = may, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2460383.2460386", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/java2010.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "JavaScript is an interpreted programming language most often used for enhancing webpage interactivity and functionality. It has powerful capabilities to interact with webpage documents and browser windows, however, it has also opened the door for many browser-based security attacks. Insecure engineering practices of using JavaScript may not directly lead to security breaches, but they can create new attack vectors and greatly increase the risks of browser-based attacks. In this article, we present the first measurement study on insecure practices of using JavaScript on the Web. Our focus is on the insecure practices of JavaScript inclusion and dynamic generation, and we examine their severity and nature on 6,805 unique websites. Our measurement results reveal that insecure JavaScript practices are common at various websites: (1) at least 66.4\% of the measured websites manifest the insecure practices of including JavaScript files from external domains into the top-level documents of their webpages; (2) over 44.4\% of the measured websites use the dangerous eval() function to dynamically generate and execute JavaScript code on their webpages; and (3) in JavaScript dynamic generation, using the document.write() method and the innerHTML property is much more popular than using the relatively secure technique of creating script elements via DOM methods. Our analysis indicates that safe alternatives to these insecure practices exist in common cases and ought to be adopted by website developers and administrators for reducing potential security risks.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Su:2013:UQI, author = "Weifeng Su and Hejun Wu and Yafei Li and Jing Zhao and Frederick H. Lochovsky and Hongmin Cai and Tianqiang Huang", title = "Understanding query interfaces by statistical parsing", journal = j-TWEB, volume = "7", number = "2", pages = "8:1--8:??", month = may, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2460383.2460387", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Users submit queries to an online database via its query interface. Query interface parsing, which is important for many applications, understands the query capabilities of a query interface. Since most query interfaces are organized hierarchically, we present a novel query interface parsing method, StatParser (Statistical Parser), to automatically extract the hierarchical query capabilities of query interfaces. StatParser automatically learns from a set of parsed query interfaces and parses new query interfaces. StatParser starts from a small grammar and enhances the grammar with a set of probabilities learned from parsed query interfaces under the maximum-entropy principle. Given a new query interface, the probability-enhanced grammar identifies the parse tree with the largest global probability to be the query capabilities of the query interface. Experimental results show that StatParser very accurately extracts the query capabilities and can effectively overcome the problems of existing query interface parsers.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Diaz:2013:LEU, author = "Oscar D{\'\i}az and Crist{\'o}bal Arellano and Maider Azanza", title = "A language for end-user {Web} augmentation: Caring for producers and consumers alike", journal = j-TWEB, volume = "7", number = "2", pages = "9:1--9:??", month = may, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2460383.2460388", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/java2010.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web augmentation is to the Web what augmented reality is to the physical world: layering relevant content/layout/navigation over the existing Web to customize the user experience. This is achieved through JavaScript (JS) using browser weavers (e.g., Greasemonkey). To date, over 43 million of downloads of Greasemonkey scripts ground the vitality of this movement. However, Web augmentation is hindered by being programming intensive and prone to malware. This prevents end-users from participating as both producers and consumers of scripts: producers need to know JS, consumers need to trust JS. This article aims at promoting end-user participation in both roles. The vision is for end-users to prosume (the act of simultaneously caring for producing and consuming) scripts as easily as they currently prosume their pictures or videos. Encouraging production requires more natural'' and abstract constructs. Promoting consumption calls for augmentation scripts to be easier to understand, share, and trust upon. To this end, we explore the use of Domain-Specific Languages (DSLs) by introducing Sticklet. Sticklet is an internal DSL on JS, where JS generality is reduced for the sake of learnability and reliability. Specifically, Web augmentation is conceived as fixing in existing web sites (i.e., the wall ) HTML fragments extracted from either other sites or Web services (i.e., the stickers ). Sticklet targets hobby programmers as producers, and computer literates as consumers. From a producer perspective, benefits are threefold. As a restricted grammar on top of JS, Sticklet expressions are domain oriented and more declarative than their JS counterparts, hence speeding up development. As syntactically correct JS expressions, Sticklet scripts can be installed as traditional scripts and hence, programmers can continue using existing JS tools. As declarative expressions, they are easier to maintain, and amenable for optimization. From a consumer perspective, domain specificity brings understandability (due to declarativeness), reliability (due to built-in security), and consumability'' (i.e., installation/enactment/sharing of Sticklet expressions are tuned to the shortage of time and skills of the target audience). Preliminary evaluations indicate that 77\% of the subjects were able to develop new Sticklet scripts in less than thirty minutes while 84\% were able to consume these scripts in less than ten minutes. Sticklet is available to download as a Mozilla add-on.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Kaldeli:2013:CWS, author = "Eirini Kaldeli and Ehsan Ullah Warriach and Alexander Lazovik and Marco Aiello", title = "Coordinating the web of services for a smart home", journal = j-TWEB, volume = "7", number = "2", pages = "10:1--10:??", month = may, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2460383.2460389", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Domotics, concerned with the realization of intelligent home environments, is a novel field which can highly benefit from solutions inspired by service-oriented principles to enhance the convenience and security of modern home residents. In this work, we present an architecture for a smart home, starting from the lower device interconnectivity level up to the higher application layers that undertake the load of complex functionalities and provide a number of services to end-users. We claim that in order for smart homes to exhibit a genuinely intelligent behavior, the ability to compute compositions of individual devices automatically and dynamically is paramount. To this end, we incorporate into the architecture a composition component that employs artificial intelligence domain-independent planning to generate compositions at runtime, in a constantly evolving environment. We have implemented a fully working prototype that realizes such an architecture, and have evaluated it both in terms of performance as well as from the end-user point of view. The results of the evaluation show that the service-oriented architectural design and the support for dynamic compositions is quite efficient from the technical point of view, and that the system succeeds in satisfying the expectations and objectives of the users.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Balakrishnan:2013:ART, author = "Raju Balakrishnan and Subbarao Kambhampati and Manishkumar Jha", title = "Assessing relevance and trust of the deep web sources and results based on inter-source agreement", journal = j-TWEB, volume = "7", number = "2", pages = "11:1--11:??", month = may, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2460383.2460390", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Deep web search engines face the formidable challenge of retrieving high-quality results from the vast collection of searchable databases. Deep web search is a two-step process of selecting the high-quality sources and ranking the results from the selected sources. Though there are existing methods for both the steps, they assess the relevance of the sources and the results using the query-result similarity. When applied to the deep web these methods have two deficiencies. First is that they are agnostic to the correctness (trustworthiness) of the results. Second, the query-based relevance does not consider the importance of the results and sources. These two considerations are essential for the deep web and open collections in general. Since a number of deep web sources provide answers to any query, we conjuncture that the agreements between these answers are helpful in assessing the importance and the trustworthiness of the sources and the results. For assessing source quality, we compute the agreement between the sources as the agreement of the answers returned. While computing the agreement, we also measure and compensate for the possible collusion between the sources. This adjusted agreement is modeled as a graph with sources at the vertices. On this agreement graph, a quality score of a source, that we call SourceRank, is calculated as the stationary visit probability of a random walk. For ranking results, we analyze the second-order agreement between the results. Further extending SourceRank to multidomain search, we propose a source ranking sensitive to the query domains. Multiple domain-specific rankings of a source are computed, and these ranks are combined for the final ranking. We perform extensive evaluations on online and hundreds of Google Base sources spanning across domains. The proposed result and source rankings are implemented in the deep web search engine Factal. We demonstrate that the agreement analysis tracks source corruption. Further, our relevance evaluations show that our methods improve precision significantly over Google Base and the other baseline methods. The result ranking and the domain-specific source ranking are evaluated separately.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Nguyen:2013:FWT, author = "Cam-Tu Nguyen and Natsuda Kaothanthong and Takeshi Tokuyama and Xuan-Hieu Phan", title = "A feature-word-topic model for image annotation and retrieval", journal = j-TWEB, volume = "7", number = "3", pages = "12:1--12:??", month = sep, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2516633.2516634", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Image annotation is a process of finding appropriate semantic labels for images in order to obtain a more convenient way for indexing and searching images on the Web. This article proposes a novel method for image annotation based on combining feature-word distributions, which map from visual space to word space, and word-topic distributions, which form a structure to capture label relationships for annotation. We refer to this type of model as Feature-Word-Topic models. The introduction of topics allows us to efficiently take word associations, such as {ocean, fish, coral} or {desert, sand, cactus}, into account for image annotation. Unlike previous topic-based methods, we do not consider topics as joint distributions of words and visual features, but as distributions of words only. Feature-word distributions are utilized to define weights in computation of topic distributions for annotation. By doing so, topic models in text mining can be applied directly in our method. Our Feature-word-topic model, which exploits Gaussian Mixtures for feature-word distributions, and probabilistic Latent Semantic Analysis (pLSA) for word-topic distributions, shows that our method is able to obtain promising results in image annotation and retrieval.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Vargiu:2013:ICA, author = "Eloisa Vargiu and Alessandro Giuliani and Giuliano Armano", title = "Improving contextual advertising by adopting collaborative filtering", journal = j-TWEB, volume = "7", number = "3", pages = "13:1--13:??", month = sep, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2516633.2516635", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Contextual advertising can be viewed as an information filtering task aimed at selecting suitable ads to be suggested to the final user'', that is, the Web page in hand. Starting from this insight, in this article we propose a novel system, which adopts a collaborative filtering approach to perform contextual advertising. In particular, given a Web page, the system relies on collaborative filtering to classify the page content and to suggest suitable ads accordingly. Useful information is extracted from inlinks'', that is, similar pages that link to the Web page in hand. In so doing, collaborative filtering is used in a content-based setting, giving rise to a hybrid contextual advertising system. After being implemented, the system has been experimented with about 15000 Web pages extracted from the Open Directory Project. Comparative experiments with a content-based system have been performed. The corresponding results highlight that the proposed system performs better. A suitable case study is also provided to enable the reader to better understand how the system works and its effectiveness.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Conti:2013:VPS, author = "Mauro Conti and Arbnor Hasani and Bruno Crispo", title = "Virtual private social networks and a {Facebook} implementation", journal = j-TWEB, volume = "7", number = "3", pages = "14:1--14:??", month = sep, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2516633.2516636", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The popularity of Social Networking Sites (SNS) is growing rapidly, with the largest sites serving hundreds of millions of users and their private information. The privacy settings of these SNSs do not allow the user to avoid sharing some information (e.g., name and profile picture) with all the other users. Also, no matter the privacy settings, this information is always shared with the SNS (that could sell this information or be hacked). To mitigate these threats, we recently introduced the concept of Virtual Private Social Networks (VPSNs). In this work we propose the first complete architecture and implementation of VPSNs for Facebook. In particular, we address an important problem left unexplored in our previous research-that is the automatic propagation of updated profiles to all the members of the same VPSN. Furthermore, we made an in-depth study on performance and implemented several optimization to reduce the impact of VPSN on user experience. The proposed solution is lightweight, completely distributed, does not depend on the collaboration from Facebook, does not have a central point of failure, it offers (with some limitations) the same functionality as Facebook, and apart from some simple settings, the solution is almost transparent to the user. Thorough experiments, with an extended set of parameters, we have confirmed the feasibility of the proposal and have shown a very limited time-overhead experienced by the user while browsing Facebook pages.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cambazoglu:2013:TBI, author = "B. Barla Cambazoglu and Enver Kayaaslan and Simon Jonassen and Cevdet Aykanat", title = "A term-based inverted index partitioning model for efficient distributed query processing", journal = j-TWEB, volume = "7", number = "3", pages = "15:1--15:??", month = sep, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2516633.2516637", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In a shared-nothing, distributed text retrieval system, queries are processed over an inverted index that is partitioned among a number of index servers. In practice, the index is either document-based or term-based partitioned. This choice is made depending on the properties of the underlying hardware infrastructure, query traffic distribution, and some performance and availability constraints. In query processing on retrieval systems that adopt a term-based index partitioning strategy, the high communication overhead due to the transfer of large amounts of data from the index servers forms a major performance bottleneck, deteriorating the scalability of the entire distributed retrieval system. In this work, to alleviate this problem, we propose a novel inverted index partitioning model that relies on hypergraph partitioning. In the proposed model, concurrently accessed index entries are assigned to the same index servers, based on the inverted index access patterns extracted from the past query logs. The model aims to minimize the communication overhead that will be incurred by future queries while maintaining the computational load balance among the index servers. We evaluate the performance of the proposed model through extensive experiments using a real-life text collection and a search query sample. Our results show that considerable performance gains can be achieved relative to the term-based index partitioning strategies previously proposed in literature. In most cases, however, the performance remains inferior to that attained by document-based partitioning.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Weninger:2013:PPF, author = "Tim Weninger and Thomas J. Johnston and Jiawei Han", title = "The parallel path framework for entity discovery on the web", journal = j-TWEB, volume = "7", number = "3", pages = "16:1--16:??", month = sep, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2516633.2516638", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "It has been a dream of the database and Web communities to reconcile the unstructured nature of the World Wide Web with the neat, structured schemas of the database paradigm. Even though databases are currently used to generate Web content in some sites, the schemas of these databases are rarely consistent across a domain. This makes the comparison and aggregation of information from different domains difficult. We aim to make an important step towards resolving this disparity by using the structural and relational information on the Web to (1) extract Web lists, (2) find entity-pages, (3) map entity-pages to a database, and (4) extract attributes of the entities. Specifically, given a Web site and an entity-page (e.g., university department and faculty member home page) we seek to find all of the entity-pages of the same type (e.g., all faculty members in the department), as well as attributes of the specific entities (e.g., their phone numbers, email addresses, office numbers). To do this, we propose a Web structure mining method which grows parallel paths through the Web graph and DOM trees and propagates relevant attribute information forward. We show that by utilizing these parallel paths we can efficiently discover entity-pages and attributes. Finally, we demonstrate the accuracy of our method with a large case study.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liu:2013:SCB, author = "Liwei Liu and Freddy Lecue and Nikolay Mehandjiev", title = "Semantic content-based recommendation of software services using context", journal = j-TWEB, volume = "7", number = "3", pages = "17:1--17:??", month = sep, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2516633.2516639", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The current proliferation of software services means users should be supported when selecting one service out of the many which meet their needs. Recommender Systems provide such support for selecting products and conventional services, yet their direct application to software services is not straightforward, because of the current scarcity of available user feedback, and the need to fine-tune software services to the context of intended use. In this article, we address these issues by proposing a semantic content-based recommendation approach that analyzes the context of intended service use to provide effective recommendations in conditions of scarce user feedback. The article ends with two experiments based on a realistic set of semantic services. The first experiment demonstrates how the proposed semantic content-based approach can produce effective recommendations using semantic reasoning over service specifications by comparing it with three other approaches. The second experiment demonstrates the effectiveness of the proposed context analysis mechanism by comparing the performance of both context-aware and plain versions of our semantic content-based approach, benchmarked against user-performed selection informed by context.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Jiang:2013:ULI, author = "Jing Jiang and Christo Wilson and Xiao Wang and Wenpeng Sha and Peng Huang and Yafei Dai and Ben Y. Zhao", title = "Understanding latent interactions in online social networks", journal = j-TWEB, volume = "7", number = "4", pages = "18:1--18:??", month = oct, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2517040", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Popular online social networks (OSNs) like Facebook and Twitter are changing the way users communicate and interact with the Internet. A deep understanding of user interactions in OSNs can provide important insights into questions of human social behavior and into the design of social platforms and applications. However, recent studies have shown that a majority of user interactions on OSNs are latent interactions, that is, passive actions, such as profile browsing, that cannot be observed by traditional measurement techniques. In this article, we seek a deeper understanding of both active and latent user interactions in OSNs. For quantifiable data on latent user interactions, we perform a detailed measurement study on Renren, the largest OSN in China with more than 220 million users to date. All friendship links in Renren are public, allowing us to exhaustively crawl a connected graph component of 42 million users and 1.66 billion social links in 2009. Renren also keeps detailed, publicly viewable visitor logs for each user profile. We capture detailed histories of profile visits over a period of 90 days for users in the Peking University Renren network and use statistics of profile visits to study issues of user profile popularity, reciprocity of profile visits, and the impact of content updates on user popularity. We find that latent interactions are much more prevalent and frequent than active events, are nonreciprocal in nature, and that profile popularity is correlated with page views of content rather than with quantity of content updates. Finally, we construct latent interaction graphs as models of user browsing behavior and compare their structural properties, evolution, community structure, and mixing times against those of both active interaction graphs and social graphs.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Quarteroni:2013:BKA, author = "Silvia Quarteroni and Marco Brambilla and Stefano Ceri", title = "A bottom-up, knowledge-aware approach to integrating and querying {Web} data services", journal = j-TWEB, volume = "7", number = "4", pages = "19:1--19:??", month = oct, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2493536", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "As a wealth of data services is becoming available on the Web, building and querying Web applications that effectively integrate their content is increasingly important. However, schema integration and ontology matching with the aim of registering data services often requires a knowledge-intensive, tedious, and error-prone manual process. We tackle this issue by presenting a bottom-up, semi-automatic service registration process that refers to an external knowledge base and uses simple text processing techniques in order to minimize and possibly avoid the contribution of domain experts in the annotation of data services. The first by-product of this process is a representation of the domain of data services as an entity-relationship diagram, whose entities are named after concepts of the external knowledge base matching service terminology rather than being manually created to accommodate an application-specific ontology. Second, a three-layer annotation of service semantics (service interfaces, access patterns, service marts) describing how services play'' with such domain elements is also automatically constructed at registration time. When evaluated against heterogeneous existing data services and with a synthetic service dataset constructed using Google Fusion Tables, the approach yields good results in terms of data representation accuracy. We subsequently demonstrate that natural language processing methods can be used to decompose and match simple queries to the data services represented in three layers according to the preceding methodology with satisfactory results. We show how semantic annotations are used at query time to convert the user's request into an executable logical query. Globally, our findings show that the proposed registration method is effective in creating a uniform semantic representation of data services, suitable for building Web applications and answering search queries.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Leiva:2013:WBB, author = "Luis A. Leiva and Roberto Viv{\'o}", title = "{Web} browsing behavior analysis and interactive hypervideo", journal = j-TWEB, volume = "7", number = "4", pages = "20:1--20:??", month = oct, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2529995.2529996", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Processing data on any sort of user interaction is well known to be cumbersome and mostly time consuming. In order to assist researchers in easily inspecting fine-grained browsing data, current tools usually display user interactions as mouse cursor tracks, a video-like visualization scheme. However, to date, traditional online video inspection has not explored the full capabilities of hypermedia and interactive techniques. In response to this need, we have developed SMT2 \epsilon , a Web-based tracking system for analyzing browsing behavior using feature-rich hypervideo visualizations. We compare our system to related work in academia and the industry, showing that ours features unprecedented visualization capabilities. We also show that SMT2 \epsilon efficiently captures browsing data and is perceived by users to be both helpful and usable. A series of prediction experiments illustrate that raw cursor data are accessible and can be easily handled, providing evidence that the data can be used to construct and verify research hypotheses. Considering its limitations, it is our hope that SMT2 \epsilon will assist researchers, usability practitioners, and other professionals interested in understanding how users browse the Web.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bing:2013:RDS, author = "Lidong Bing and Wai Lam and Tak-Lam Wong", title = "Robust detection of semi-structured web records using a {DOM} structure-knowledge-driven model", journal = j-TWEB, volume = "7", number = "4", pages = "21:1--21:??", month = oct, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2508434", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web data record extraction aims at extracting a set of similar object records from a single webpage. These records have similar attributes or fields and are presented with a regular format in a coherent region of the page. To tackle this problem, most existing works analyze the DOM tree of an input page. One major limitation of these methods is that the lack of a global view in detecting data records from an input page results in a myopic decision. Their brute-force searching manner in detecting various types of records degrades the flexibility and robustness. We propose a Structure-Knowledge-Oriented Global Analysis (Skoga) framework which can perform robust detection of different-kinds of data records and record regions. The major component of the Skoga framework is a DOM structure-knowledge-driven detection model which can conduct a global analysis on the DOM structure to achieve effective detection. The DOM structure knowledge consists of background knowledge as well as statistical knowledge capturing different characteristics of data records and record regions, as exhibited in the DOM structure. The background knowledge encodes the semantics of labels indicating general constituents of data records and regions. The statistical knowledge is represented by some carefully designed features that capture different characteristics of a single node or a node group in the DOM. The feature weights are determined using a development dataset via a parameter estimation algorithm based on a structured output support vector machine. An optimization method based on the divide-and-conquer principle is developed making use of the DOM structure knowledge to quantitatively infer and recognize appropriate records and regions for a page. Extensive experiments have been conducted on four datasets. The experimental results demonstrate that our framework achieves higher accuracy compared with state-of-the-art methods.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liao:2013:VAC, author = "Zhen Liao and Daxin Jiang and Jian Pei and Yalou Huang and Enhong Chen and Huanhuan Cao and Hang Li", title = "A {vlHMM} approach to context-aware search", journal = j-TWEB, volume = "7", number = "4", pages = "22:1--22:??", month = oct, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2490255", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Capturing the context of a user's query from the previous queries and clicks in the same session leads to a better understanding of the user's information need. A context-aware approach to document reranking, URL recommendation, and query suggestion may substantially improve users' search experience. In this article, we propose a general approach to context-aware search by learning a variable length hidden Markov model ( vlHMM ) from search sessions extracted from log data. While the mathematical model is powerful, the huge amounts of log data present great challenges. We develop several distributed learning techniques to learn a very large vlHMM under the map-reduce framework. Moreover, we construct feature vectors for each state of the vlHMM model to handle users' novel queries not covered by the training data. We test our approach on a raw dataset consisting of 1.9 billion queries, 2.9 billion clicks, and 1.2 billion search sessions before filtering, and evaluate the effectiveness of the vlHMM learned from the real data on three search applications: document reranking, query suggestion, and URL recommendation. The experiment results validate the effectiveness of vlHMM in the applications of document reranking, URL recommendation, and query suggestion.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{White:2013:CBD, author = "Ryen W. White and Eric Horvitz", title = "Captions and biases in diagnostic search", journal = j-TWEB, volume = "7", number = "4", pages = "23:1--23:??", month = oct, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2486040", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "People frequently turn to the Web with the goal of diagnosing medical symptoms. Studies have shown that diagnostic search can often lead to anxiety about the possibility that symptoms are explained by the presence of rare, serious medical disorders, rather than far more common benign syndromes. We study the influence of the appearance of potentially-alarming content, such as severe illnesses or serious treatment options associated with the queried for symptoms, in captions comprising titles, snippets, and URLs. We explore whether users are drawn to results with potentially-alarming caption content, and if so, the implications of such attraction for the design of search engines. We specifically study the influence of the content of search result captions shown in response to symptom searches on search-result click-through behavior. We show that users are significantly more likely to examine and click on captions containing potentially-alarming medical terminology such as heart attack'' or medical emergency'' independent of result rank position and well-known positional biases in users' search examination behaviors. The findings provide insights about the possible effects of displaying implicit correlates of searchers' goals in search-result captions, such as unexpressed concerns and fears. As an illustration of the potential utility of these results, we developed and evaluated an enhanced click prediction model that incorporates potentially-alarming caption features and show that it significantly outperforms models that ignore caption content. Beyond providing additional understanding of the effects of Web content on medical concerns, the methods and findings have implications for search engine design. As part of our discussion on the implications of this research, we propose procedures for generating more representative captions that may be less likely to cause alarm, as well as methods for learning to more appropriately rank search results from logged search behavior, for examples, by also considering the presence of potentially-alarming content in the captions that motivate observed clicks and down-weighting clicks seemingly driven by searchers' health anxieties.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Lee:2013:SCA, author = "Jung-Hyun Lee and Jongwoo Ha and Jin-Yong Jung and Sangkeun Lee", title = "Semantic contextual advertising based on the open directory project", journal = j-TWEB, volume = "7", number = "4", pages = "24:1--24:??", month = oct, year = "2013", CODEN = "????", DOI = "http://dx.doi.org/10.1145/2529995.2529997", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Contextual advertising seeks to place relevant textual ads within the content of generic webpages. In this article, we explore a novel semantic approach to contextual advertising. This consists of three tasks: (1) building a well-organized hierarchical taxonomy of topics, (2) developing a robust classifier for effectively finding the topics of pages and ads, and (3) ranking ads based on the topical relevance to pages. First, we heuristically build our own taxonomy of topics from the Open Directory Project (ODP). Second, we investigate how to increase classification accuracy by taking the unique characteristics of the ODP into account. Last, we measure the topical relevance of ads by applying a link analysis technique to the similarity graph carefully derived from our taxonomy. Experiments show that our classification method improves the performance of Ma- F$_1\$ by as much
as 25.7\% over the baseline classifier. In addition,
our ranking method enhances the relevance of ads
substantially, up to 10\% in terms of precision at k,
compared to a representative strategy.",
acknowledgement = ack-nhfb,
articleno =    "24",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Huang:2013:UEQ,
author =       "Xiaodi Huang",
title =        "{UsageQoS}: Estimating the {QoS} of {Web} Services
through Online User Communities",
journal =      j-TWEB,
volume =       "8",
number =       "1",
pages =        "1:1--1:??",
month =        dec,
year =         "2013",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2532635",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Services are an indispensable component in cloud
computing. Web services are particularly important. As
an increasing number of Web services provides
equivalent functions, one common issue faced by users
is the selection of the most appropriate one based on
that characterizes the quality of Web services, an
algorithm that quantifies them, and a system
architecture that ranks Web services by using the
proposed algorithm. In particular, the algorithm,
called UsageQoS that computes the scores of quality of
service (QoS) of Web services within a community, makes
use of the usage frequencies of Web services. The
frequencies are defined as the numbers of times invoked
by other services in a given time period. The UsageQoS
algorithm is able to optionally take user ratings as
its initial input. The proposed approach has been
validated by extensively experimenting on several
datasets, including two real datasets. The results of
the experiments have demonstrated that our approach is
capable of estimating QoS parameters of Web services,
regardless of whether user ratings are available or
not.",
acknowledgement = ack-nhfb,
articleno =    "1",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Weber:2013:FBW,
author =       "Ingo Weber and Hye-Young Paik and Boualem Benatallah",
title =        "Form-Based {Web} Service Composition for Domain
Experts",
journal =      j-TWEB,
volume =       "8",
number =       "1",
pages =        "2:1--2:??",
month =        dec,
year =         "2013",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2542168",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "In many cases, it is not cost effective to automate
business processes which affect a small number of
people and/or change frequently. We present a novel
approach for enabling domain experts to model and
deploy such processes from their respective domain as
Web service compositions. The approach builds on
user-editable service, naming and representing Web
services as forms. On this basis, the approach provides
a visual composition language with a targeted
restriction of control-flow expressivity, process
simulation, automated process verification mechanisms,
and code generation for executing orchestrations. A
Web-based service composition prototype implements this
approach, including a WS-BPEL code generator. A small
lab user study with 14 participants showed promising
results for the usability of the system, even for
nontechnical domain experts.",
acknowledgement = ack-nhfb,
articleno =    "2",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Ozcan:2013:SCH,
author =       "Rifat Ozcan and Ismail Sengor Altingovde and B. Barla
Cambazoglu and {\"O}zg{\"u}r Ulusoy",
title =        "Second Chance: a Hybrid Approach for Dynamic Result
Caching and Prefetching in Search Engines",
journal =      j-TWEB,
volume =       "8",
number =       "1",
pages =        "3:1--3:??",
month =        dec,
year =         "2013",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2536777",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Web search engines are known to cache the results of
previously issued queries. The stored results typically
contain the document summaries and some data that is
used to construct the final search result page returned
to the user. An alternative strategy is to store in the
cache only the result document IDs, which take much
less space, allowing results of more queries to be
cached. These two strategies lead to an interesting
trade-off between the hit rate and the average query
response latency. In this work, in order to exploit
this trade-off, we propose a hybrid result caching
strategy where a dynamic result cache is split into two
sections: an HTML cache and a docID cache. Moreover,
using a realistic cost model, we evaluate the
performance of different result prefetching strategies
for the proposed hybrid cache and the baseline
HTML-only cache. Finally, we propose a machine learning
approach to predict singleton queries, which occur only
once in the query stream. We show that when the
proposed hybrid result caching strategy is coupled with
the singleton query predictor, the hit rate is further
improved.",
acknowledgement = ack-nhfb,
articleno =    "3",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Sherkat:2013:ETS,
author =       "Reza Sherkat and Jing Li and Nikos Mamoulis",
title =        "Efficient Time-Stamped Event Sequence Anonymization",
journal =      j-TWEB,
volume =       "8",
number =       "1",
pages =        "4:1--4:??",
month =        dec,
year =         "2013",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2532643",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "With the rapid growth of applications which generate
timestamped sequences (click streams, GPS trajectories,
RFID sequences), sequence anonymization has become an
important problem, in that should such data be
published or shared. Existing trajectory anonymization
techniques disregard the importance of time or the
our knowledge, thorough study on time-stamped event
sequence anonymization. We propose a novel and tunable
generalization framework tailored to event sequences.
We generalize time stamps using time intervals and
events using a taxonomy which models the domain
semantics. We consider two scenarios: (i) sharing the
data with a single receiver (the SSR setting), where
the receiver's background knowledge is confined to a
set of time stamps and time generalization suffices,
and (ii) sharing the data with colluding receivers (the
SCR setting), where time generalization should be
combined with event generalization. For both cases, we
propose appropriate anonymization methods that prevent
both user identification and event prediction. To
achieve computational efficiency and scalability, we
propose optimization techniques for both cases using a
utility-based index, compact summaries, fast to compute
bounds for utility, and a novel taxonomy-aware distance
function. Extensive experiments confirm the
effectiveness of our approach compared with state of
the art, in terms of information loss, range query
distortion, and preserving temporal causality patterns.
Furthermore, our experiments demonstrate efficiency and
scalability on large-scale real and synthetic
datasets.",
acknowledgement = ack-nhfb,
articleno =    "4",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Bellido:2013:CFP,
author =       "Jesus Bellido and Rosa Alarc{\'o}n and Cesare
Pautasso",
title =        "Control-Flow Patterns for Decentralized {RESTful}
Service Composition",
journal =      j-TWEB,
volume =       "8",
number =       "1",
pages =        "5:1--5:??",
month =        dec,
year =         "2013",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2535911",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "The REST architectural style has attracted a lot of
interest from industry due to the nonfunctional
properties it contributes to Web-based solutions.
SOAP/WSDL-based services, on the other hand, provide
tools and methodologies that allow the design and
development of software supporting complex service
arrangements, enabling complex business processes which
make use of well-known control-flow patterns. It is not
clear if and how such patterns should be modeled,
considering RESTful Web services that comply with the
statelessness, uniform interface and hypermedia
fundamental control-flow patterns in the context of
stateless compositions of RESTful services. We propose
a means of enabling their implementation using the HTTP
protocol and discuss the impact of our design choices
according to key REST architectural principles. We hope
to shed new light on the design of basic building
acknowledgement = ack-nhfb,
articleno =    "5",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

author =       "Sergiu Chelaru and Ismail Sengor Altingovde and Stefan
Siersdorfer and Wolfgang Nejdl",
title =        "Analyzing, Detecting, and Exploiting Sentiment in
{Web} Queries",
journal =      j-TWEB,
volume =       "8",
number =       "1",
pages =        "6:1--6:??",
month =        dec,
year =         "2013",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2535525",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "The Web contains an increasing amount of biased and
opinionated documents on politics, products, and
indepth analysis of Web search queries for
controversial topics, focusing on query sentiment. To
this end, we conduct extensive user assessments and
discriminative term analyses, as well as a sentiment
analysis using the SentiWordNet thesaurus, a lexical
resource containing sentiment annotations. Furthermore,
in order to detect the sentiment expressed in queries,
we build different classifiers based on query texts,
query result titles, and snippets. We demonstrate the
virtue of query sentiment detection in two different
use cases. First, we define a query recommendation
scenario that employs sentiment detection of results to
recommend additional queries for polarized queries
issued by search engine users. The second application
scenario is controversial topic discovery, where query
sentiment classifiers are employed to discover
previously unknown topics that trigger both highly
positive and negative opinions among the users of a
search engine. For both use cases, the results of our
evaluations on real-world data are promising and show
the viability and potential of query sentiment analysis
in practical scenarios.",
acknowledgement = ack-nhfb,
articleno =    "6",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Torres:2014:ASB,
author =       "Sergio Duarte Torres and Ingmar Weber and Djoerd
Hiemstra",
title =        "Analysis of Search and Browsing Behavior of Young
Users on the {Web}",
journal =      j-TWEB,
volume =       "8",
number =       "2",
pages =        "7:1--7:??",
month =        mar,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2555595",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "The Internet is increasingly used by young children
for all kinds of purposes. Nonetheless, there are not
many resources especially designed for children on the
Internet and most of the content online is designed for
grown-up users. This situation is problematic if we
consider the large differences between young users and
adults since their topic interests, computer skills,
and language capabilities evolve rapidly during
childhood. There is little research aimed at exploring
and measuring the difficulties that children encounter
on the Internet when searching for information and
browsing for content. In the first part of this work,
we employed query logs from a commercial search engine
to quantify the difficulties children of different ages
encounter on the Internet and to characterize the
topics that they search for. We employed query metrics
(e.g., the fraction of queries posed in natural
language), session metrics (e.g., the fraction of
abandoned sessions), and click activity (e.g., the
fraction of ad clicks). The search logs were also used
to retrace stages of child development. Concretely, we
looked for changes in interests (e.g., the distribution
of topics searched) and language development (e.g., the
readability of the content accessed and the vocabulary
size). In the second part of this work, we employed
toolbar logs from a commercial search engine to
characterize the browsing behavior of young users,
particularly to understand the activities on the
Internet that trigger search. We quantified the
proportion of browsing and search activity in the
toolbar sessions and we estimated the likelihood of a
user to carry out search on the Web vertical and
multimedia verticals (i.e., videos and images) given
that the previous event is another search event or a
browsing event. We observed that these metrics clearly
demonstrate an increased level of confusion and
unsuccessful search sessions among children. We also
found a clear relation between the reading level of the
clicked pages and characteristics of the users such as
age and educational attainment. In terms of browsing
behavior, children were found to start their activities
on the Internet with a search engine (instead of
directly browsing content) more often than adults. We
also observed a significantly larger amount of browsing
activity for the case of teenager users. Interestingly
we also found that if children visit knowledge-related
Web sites (i.e., information-dense pages such as
Wikipedia articles), they subsequently do more Web
especially teenagers were found to have a greater
tendency to engage in multimedia search, which calls to
improve the aggregation of multimedia results into the
current search result pages.",
acknowledgement = ack-nhfb,
articleno =    "7",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Su:2014:HIY,
author =       "Ao-Jan Su and Y. Charlie Hu and Aleksandar Kuzmanovic
and Cheng-Kok Koh",
title =        "How to Improve Your Search Engine Ranking: Myths and
Reality",
journal =      j-TWEB,
volume =       "8",
number =       "2",
pages =        "8:1--8:??",
month =        mar,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2579990",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Search engines have greatly influenced the way people
access information on the Internet, as such engines
provide the preferred entry point to billions of pages
on the Web. Therefore, highly ranked Web pages
generally have higher visibility to people and pushing
the ranking higher has become the top priority for Web
masters. As a matter of fact, Search Engine
Optimization (SEO) has became a sizeable business that
attempts to improve their clients' ranking. Still, the
lack of ways to validate SEO's methods has created
numerous myths and fallacies associated with ranking
algorithms, Google's and Bing's, and design, implement,
and evaluate a ranking system to systematically
popular ranking algorithms. We demonstrate that linear
learning models, coupled with a recursive partitioning
ranking scheme, are capable of predicting ranking
results with high accuracy. As an example, we manage to
correctly predict 7 out of the top 10 pages for 78\% of
evaluated keywords. Moreover, for content-only ranking,
our system can correctly predict 9 or more pages out of
the top 10 ones for 77\% of search terms. We show how
our ranking system can be used to reveal the relative
importance of ranking features in a search engine's
ranking function, provide guidelines for SEOs and Web
masters to optimize their Web pages, validate or
disprove new ranking features, and evaluate search
engine ranking results for possible ranking bias.",
acknowledgement = ack-nhfb,
articleno =    "8",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Sirivianos:2014:LSF,
author =       "Michael Sirivianos and Kyungbaek Kim and Jian Wei Gan
and Xiaowei Yang",
title =        "Leveraging Social Feedback to Verify Online Identity
Claims",
journal =      j-TWEB,
volume =       "8",
number =       "2",
pages =        "9:1--9:??",
month =        mar,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2543711",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Anonymity is one of the main virtues of the Internet,
as it protects privacy and enables users to express
opinions more freely. However, anonymity hinders the
assessment of the veracity of assertions that online
users make about their identity attributes, such as age
or profession. We propose FaceTrust, a system that uses
online social networks to provide lightweight identity
credentials while preserving a user's anonymity.
FaceTrust employs a game with a purpose'' design to
elicit the opinions of the friends of a user about the
user's self-claimed identity attributes, and uses
attack-resistant trust inference to assign veracity
scores to identity attribute assertions. FaceTrust
provides credentials, which a user can use to
corroborate his assertions. We evaluate our proposal
using a live Facebook deployment and simulations on a
crawled social graph. The results show that our
veracity scores are strongly correlated with the ground
truth, even when dishonest users make up a large
fraction of the social network and employ the Sybil
attack.",
acknowledgement = ack-nhfb,
articleno =    "9",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Pugliese:2014:EMM,
author =       "Andrea Pugliese and Matthias Br{\"o}cheler and V. S.
Subrahmanian and Michael Ovelg{\"o}nne",
title =        "Efficient {MultiView} Maintenance under Insertion in
Huge Social Networks",
journal =      j-TWEB,
volume =       "8",
number =       "2",
pages =        "10:1--10:??",
month =        mar,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2541290",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Applications to monitor various aspects of social
networks are becoming increasingly popular. For
instance, marketers want to look for semantic patterns
relating to the content of tweets and Facebook posts
relating to their products. Law enforcement agencies
want to track behaviors involving potential criminals
on the Internet by looking for certain patterns of
behavior. Music companies want to track patterns of
spread of illegal music. These applications allow
multiple users to specify patterns of interest and
monitor them in real time as new data gets added to the
the concept of social network view servers in which all
of these types of applications can be simultaneously
monitored. The patterns of interest are expressed as
views over an underlying graph or social network
database. We show that a given set of views can be
compiled in multiple possible ways to take advantage of
common substructures and define the concept of an
optimal merge. Though finding an optimal merge is shown
to be NP-hard, we develop the AddView to find very good
merges quickly. We develop a very fast MultiView
algorithm that scalably and efficiently maintains
multiple subgraph views when insertions are made to the
social network database. We show that our algorithm is
correct, study its complexity, and experimentally
demonstrate that our algorithm can scalably handle
updates to hundreds of views on 6 real-world social
network databases with up to 540M edges.",
acknowledgement = ack-nhfb,
articleno =    "10",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Bislimovska:2014:TCB,
author =       "Bojana Bislimovska and Alessandro Bozzon and Marco
Brambilla and Piero Fraternali",
title =        "Textual and Content-Based Search in Repositories of
{Web} Application Models",
journal =      j-TWEB,
volume =       "8",
number =       "2",
pages =        "11:1--11:??",
month =        mar,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2579991",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Model-driven engineering relies on collections of
models, which are the primary artifacts for software
development. To enable knowledge sharing and reuse,
models need to be managed within repositories, where
examines two different techniques for indexing and
searching model repositories, with a focus on Web
development projects encoded in a domain-specific
language. Keyword-based and content-based search (also
known as query-by-example) are contrasted with respect
to the architecture of the system, the processing of
models and queries, and the way in which metamodel
knowledge can be exploited to improve search. A
thorough experimental evaluation is conducted to
examine what parameter configurations lead to better
accuracy and to offer an insight in what queries are
acknowledgement = ack-nhfb,
articleno =    "11",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

author =       "Alejandro Bellog{\'\i}n and Pablo Castells and
title =        "Neighbor Selection and Weighting in User-Based
Collaborative Filtering: a Performance Prediction
Approach",
journal =      j-TWEB,
volume =       "8",
number =       "2",
pages =        "12:1--12:??",
month =        mar,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2579993",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "User-based collaborative filtering systems suggest
interesting items to a user relying on similar-minded
people called neighbors. The selection and weighting of
these neighbors characterize the different
recommendation approaches. While standard strategies
perform a neighbor selection based on user
similarities, trust-aware recommendation algorithms
rely on other aspects indicative of user trust and
recommendation problem, generalizing it in terms of
performance prediction techniques, whose goal is to
predict the performance of an information retrieval
system in response to a particular query. We
investigate how to adopt the preceding generalization
to define a unified framework where we conduct an
objective analysis of the effectiveness (predictive
power) of neighbor scoring functions. The proposed
framework enables discriminating whether recommendation
performance improvements are caused by the used
neighbor scoring functions or by the ways these
functions are used in the recommendation computation.
We evaluated our approach with several state-of-the-art
and novel neighbor scoring functions on three publicly
available datasets. By empirically comparing four
neighbor quality metrics and thirteen performance
predictors, we found strong predictive power for some
of the predictors with respect to certain metrics. This
result was then validated by checking the final
performance of recommendation strategies where
predictors are used for selecting and/or weighting user
neighbors. As a result, we have found that, by
measuring the predictive power of neighbor performance
predictors, we are able to anticipate which predictors
are going to perform better in neighbor-scoring-powered
versions of a user-based collaborative filtering
algorithm.",
acknowledgement = ack-nhfb,
articleno =    "12",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Qian:2014:FTD,
author =       "Yi Qian and Sibel Adali",
title =        "Foundations of Trust and Distrust in Networks:
Extended Structural Balance Theory",
journal =      j-TWEB,
volume =       "8",
number =       "3",
pages =        "13:1--13:??",
month =        jun,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2628438",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Modeling trust in very large social networks is a hard
problem due to the highly noisy nature of these
networks that span trust relationships from many
different contexts, based on judgments of reliability,
dependability, and competence. Furthermore,
relationships in these networks vary in their level of
extension of structural balance theory as a
foundational theory of trust and distrust in networks.
Our theory preserves the distinctions between trust and
distrust as suggested in the literature, but also
incorporates the notion of relationship strength that
can be expressed as either discrete categorical values,
as pairwise comparisons, or as metric distances. Our
model is novel, has sound social and psychological
basis, and captures the classical balance theory as a
special case. We then propose a convergence model,
describing how an imbalanced network evolves towards
new balance, and formulate the convergence problem of a
social network as a Metric Multidimensional Scaling
(MDS) optimization problem. Finally, we show how the
convergence model can be used to predict edge signs in
social networks and justify our theory through
extensive experiments on real datasets.",
acknowledgement = ack-nhfb,
articleno =    "13",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Soi:2014:CDC,
author =       "Stefano Soi and Florian Daniel and Fabio Casati",
title =        "Conceptual Development of Custom, Domain-Specific
Mashup Platforms",
journal =      j-TWEB,
volume =       "8",
number =       "3",
pages =        "14:1--14:??",
month =        jun,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2628439",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Despite the common claim by mashup platforms that they
enable end-users to develop their own software, in
practice end-users still don't develop their own
mashups, as the highly technical or inexistent [sic]
user bases of today's mashup platforms testify. The key
shortcoming of current platforms is their
general-purpose nature, that privileges expressive
power over intuitiveness. In our prior work, we have
demonstrated that a domain-specific mashup approach,
which privileges intuitiveness over expressive power,
has much more potential to enable end-user development
(EUD). The problem is that developing mashup
platforms-domain-specific or not-is complex and time
platforms by their very nature target only a small user
basis, that is, the experts of the target domain, which
makes their development not sustainable if it is not
we aim to make the development of custom,
domain-specific mashup platforms cost-effective. We
describe a mashup tool development kit (MDK) that is
able to automatically generate a mashup platform
(comprising custom mashup and component description
languages and design-time and runtime environments)
from a conceptual design and to provision it as a
service. We equip the kit with a dedicated development
methodology and demonstrate the applicability and
viability of the approach with the help of two case
studies.",
acknowledgement = ack-nhfb,
articleno =    "14",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Zhang:2014:PBT,
author =       "Xianchao Zhang and You Wang and Nan Mou and Wenxin
Liang",
title =        "Propagating Both Trust and Distrust with Target
Differentiation for Combating Link-Based {Web} Spam",
journal =      j-TWEB,
volume =       "8",
number =       "3",
pages =        "15:1--15:??",
month =        jun,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2628440",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Semi-automatic anti-spam algorithms propagate either
trust through links from a good seed set (e.g.,
seed set (e.g., Anti-TrustRank) to the entire Web.
These kinds of algorithms have shown their powers in
combating link-based Web spam since they integrate both
human judgement and machine intelligence. Nevertheless,
there is still much space for improvement. One issue of
most existing trust/distrust propagation algorithms is
that only trust or distrust is propagated and only a
good seed set or a bad seed set is used. According to
Wu et al. [2006a], a combined usage of both trust and
distrust propagation can lead to better results, and an
effective framework is needed to realize this insight.
Another more serious issue of existing algorithms is
that trust or distrust is propagated in nondifferential
ways, that is, a page propagates its trust or distrust
score uniformly to its neighbors, without considering
whether each neighbor should be trusted or distrusted.
Such kinds of blind propagating schemes are
inconsistent with the original intention of
trust/distrust propagation. However, it seems
impossible to implement differential propagation if
we take the view that each Web page has both a
trustworthy side and an untrustworthy side, and we
thusly assign two scores to each Web page: T-Rank,
scoring the trustworthiness of the page, and D-Rank,
scoring the untrustworthiness of the page. We then
propose an integrated framework that propagates both
trust and distrust. In the framework, the propagation
of T-Rank/D-Rank is penalized by the target's current
D-Rank/T-Rank. In other words, the propagation of
T-Rank/D-Rank is decided by the target's current
(generalized) probability of being
trustworthy/untrustworthy; thus a page propagates more
trust/distrust to a trustworthy/untrustworthy neighbor
than to an untrustworthy/trustworthy neighbor. In this
way, propagating both trust and distrust with target
differentiation is implemented. We use T-Rank scores to
realize spam demotion and D-Rank scores to accomplish
spam detection. The proposed Trust-DistrustRank (TDR)
algorithm regresses to TrustRank and Anti-TrustRank
when the penalty factor is set to 1 and 0,
respectively. Thus TDR could be seen as a combinatorial
generalization of both TrustRank and Anti-TrustRank.
TDR not only makes full use of both trust and distrust
propagation, but also overcomes the disadvantages of
both TrustRank and Anti-TrustRank. Experimental results
on benchmark datasets show that TDR outperforms other
semi-automatic anti-spam algorithms for both spam
demotion and spam detection tasks under various
criteria.",
acknowledgement = ack-nhfb,
articleno =    "15",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Margaritis:2014:ITI,
author =       "Giorgos Margaritis and Stergios V. Anastasiadis",
title =        "Incremental Text Indexing for Fast Disk-Based Search",
journal =      j-TWEB,
volume =       "8",
number =       "3",
pages =        "16:1--16:??",
month =        jun,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2560800",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Real-time search requires to incrementally ingest
content updates and almost immediately make them
searchable while serving search queries at low latency.
This is currently feasible for datasets of moderate
size by fully maintaining the index in the main memory
of multiple machines. Instead, disk-based methods for
incremental index maintenance substantially increase
search latency with the index fragmented across
multiple disk locations. For the support of fast search
over disk-based storage, we take a fresh look at
incremental text indexing in the context of current
architectural features. We introduce a greedy method
called Selective Range Flush (SRF) to contiguously
organize the index over disk blocks and dynamically
update it at low cost. We show that SRF requires
substantial experimental effort to tune specific
parameters for performance efficiency. Subsequently, we
propose the Unified Range Flush (URF) method, which is
conceptually simpler than SRF, achieves similar or
better performance with fewer parameters and less
tuning, and is amenable to I/O complexity analysis. We
implement interesting variations of the two methods in
the Proteus prototype search engine that we developed
and do extensive experiments with three different Web
datasets of size up to 1TB. Across different systems,
we show that our methods offer search latency that
matches or reduces up to half the lowest achieved by
existing disk-based methods. In comparison to an
existing method of comparable search latency on the
same system, our methods reduce by a factor of 2.0--2.4
the I/O part of build time and by 21--24\% the total
build time.",
acknowledgement = ack-nhfb,
articleno =    "16",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Siersdorfer:2014:AMC,
author =       "Stefan Siersdorfer and Sergiu Chelaru and Jose {San
Pedro} and Ismail Sengor Altingovde and Wolfgang
Nejdl",
title =        "Analyzing and Mining Comments and Comment Ratings on
the Social {Web}",
journal =      j-TWEB,
volume =       "8",
number =       "3",
pages =        "17:1--17:??",
month =        jun,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2628441",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "An analysis of the social video sharing platform
YouTube and the news aggregator Yahoo! News reveals the
presence of vast amounts of community feedback through
comments for published videos and news stories, as well
presents an in-depth study of commenting and comment
rating behavior on a sample of more than 10 million
study, comment ratings are considered first-class
citizens. Their dependencies with textual content,
(e.g., videos and their metadata) are analyzed to
obtain a comprehensive understanding of the community
the applicability of machine learning and data mining
to detect acceptance of comments by the community,
comments likely to trigger discussions, controversial
and polarizing content, and users exhibiting offensive
commenting behavior. Results from this study have
potential application in guiding the design of
community-oriented online discussion platforms.",
acknowledgement = ack-nhfb,
articleno =    "17",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Casteleyn:2014:TYR,
author =       "Sven Casteleyn and Irene Garrig{\'o}s and
Jose-Norberto Maz{\'o}n",
title =        "Ten Years of {Rich Internet Applications}: a
Systematic Mapping Study, and Beyond",
journal =      j-TWEB,
volume =       "8",
number =       "3",
pages =        "18:1--18:??",
month =        jun,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2626369",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "The term Rich Internet Applications (RIAs) is
generally associated with Web applications that provide
the features and functionality of traditional desktop
applications. Ten years after the introduction of the
term, an ample amount of research has been carried out
to study various aspects of RIAs. It has thus become
essential to summarize this research and provide an
adequate overview. OBJECTIVE. The objective of our
study is to assemble, classify, and analyze all RIA
research performed in the scientific community, thus
providing a consolidated overview thereof, and to
identify well-established topics, trends, and open
research issues. Additionally, we provide a qualitative
discussion of the most interesting findings. This work
therefore serves as a reference work for beginning and
established RIA researchers alike, as well as for
industrial actors that need an introduction in the
field, or seek pointers to (a specific subset of) the
state-of-the-art. METHOD. A systematic mapping study is
performed in order to identify all RIA-related
publications, define a classification scheme, and
categorize, analyze, and discuss the identified
research according to it. RESULTS. Our source
identification phase resulted in 133 relevant,
peer-reviewed publications, published between 2002 and
2011 in a wide variety of venues. They were
subsequently classified according to four facets:
development activity, research topic, contribution
type, and research type. Pie, stacked bar, and bubble
charts were used to depict and analyze the results. A
deeper analysis is provided for the most interesting
and/or remarkable results. CONCLUSION. Analysis of the
results shows that, although the RIA term was coined in
2002, the first RIA-related research appeared in 2004.
From 2007 there was a significant increase in research
activity, peaking in 2009 and decreasing to pre-2009
levels afterwards. All development phases are covered
in the identified research, with emphasis on design''
(33\%) and implementation'' (29\%). The majority of
research proposes a method'' (44\%), followed by
model'' (22\%), methodology'' (18\%), and tools''
(16\%); no publications in the category metrics''
were found. The preponderant research topic is
models, methods and methodologies'' (23\%) and, to a
lesser extent, usability and accessibility'' and
user interface'' (11\% each). On the other hand, the
topic localization, internationalization and
multilinguality'' received no attention at all, and
topics such as deep Web'' (under 1\%), business
processing'', usage analysis'', data management'',
quality and metrics'' (all under 2\%), semantics'',
and performance'' (slightly above 2\%) received very
little attention. Finally, there is a large majority of
solution proposals'' (66\%), few evaluation
research'' (14\%), and even fewer validation'' (6\%),
although the latter have been increasing in recent
years.",
acknowledgement = ack-nhfb,
articleno =    "18",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Dincturk:2014:MBA,
author =       "Mustafa Emre Dincturk and Guy-Vincent Jourdan and
Gregor V. Bochmann and Iosif Viorel Onut",
title =        "A Model-Based Approach for Crawling {Rich Internet
Applications}",
journal =      j-TWEB,
volume =       "8",
number =       "3",
pages =        "19:1--19:??",
month =        jun,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2626371",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "New Web technologies, like AJAX, result in more
responsive and interactive Web applications, sometimes
called Rich Internet Applications (RIAs). Crawling
techniques developed for traditional Web applications
are not sufficient for crawling RIAs. The inability to
crawl RIAs is a problem that needs to be addressed for
at least making RIAs searchable and testable. We
present a new methodology, called model-based
crawling'', that can be used as a basis to design
efficient crawling strategies for RIAs. We illustrate
model-based crawling with a sample strategy, called the
hypercube strategy''. The performances of our
model-based crawling strategies are compared against
existing standard crawling strategies, including
breadth-first, depth-first, and a greedy strategy.
Experimental results show that our model-based crawling
approach is significantly more efficient than these
standard strategies.",
acknowledgement = ack-nhfb,
articleno =    "19",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Dragut:2014:MQR,
author =       "Eduard C. Dragut and Bhaskar Dasgupta and Brian P.
Beirne and Ali Neyestani and Badr Atassi and Clement Yu
and Weiyi Meng",
title =        "Merging Query Results From Local Search Engines for
Georeferenced Objects",
journal =      j-TWEB,
volume =       "8",
number =       "4",
pages =        "20:1--20:??",
month =        oct,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2656344",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Nov 6 16:08:07 MST 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "The emergence of numerous online sources about local
services presents a need for more automatic yet
accurate data integration techniques. Local services
are georeferenced objects and can be queried by their
locations on a map, for instance, neighborhoods.
Typical local service queries (e.g., French
Restaurant in The Loop'') include not only information
about what'' (French Restaurant'') a user is
searching for (such as cuisine) but also where''
information, such as neighborhood (The Loop''). In
translation, result merging and ranking. Most local
search engines provide a (hierarchical) organization of
(large) cities into neighborhoods. A neighborhood in
one local search engine may correspond to sets of
neighborhoods in other local search engines. These make
the query translation challenging. To provide an
local search engines, we need to combine the results
into a single list of results. Our contributions
include: (1) An integration algorithm for
neighborhoods. (2) A very effective business listing
resolution algorithm. (3) A ranking algorithm that
takes into consideration the user criteria, user
ratings and rankings. We have created a prototype
system, Yumi, over local search engines in the
restaurant domain. The restaurant domain is a
representative case study for the local services. We
conducted a comprehensive experimental study to
evaluate Yumi. A prototype version of Yumi is available
online.",
acknowledgement = ack-nhfb,
articleno =    "20",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Chen:2014:CCU,
author =       "Xihui Chen and Jun Pang and Ran Xue",
title =        "Constructing and Comparing User Mobility Profiles",
journal =      j-TWEB,
volume =       "8",
number =       "4",
pages =        "21:1--21:??",
month =        oct,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2637483",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Nov 6 16:08:07 MST 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
to location-based applications has made it possible to
users' mobility profiles subsequently brings benefits
back to location-based applications. For instance, in
on-line social networks, friends can be recommended not
only based on the similarity between their registered
information, for instance, hobbies and professions but
also referring to the similarity between their mobility
construct and compare users' mobility profiles. First,
we improve and apply frequent sequential pattern mining
technologies to extract the sequences of places that a
user frequently visits and use them to model his
mobility profile. Second, we present a new method to
calculate the similarity between two users using their
mobility profiles. More specifically, we identify the
weaknesses of a similarity metric in the literature,
and propose a new one which not only fixes the
weaknesses but also provides more precise and effective
similarity estimation. Third, we consider the semantics
of spatio-temporal information contained in user
mobility profiles and add them into the calculation of
user similarity. It enables us to measure users'
similarity from different perspectives. Two specific
location semantics and temporal semantics. Last, we
validate our approach by applying it to two real-life
datasets collected by Microsoft Research Asia and
Yonsei University, respectively. The results show that
our approach outperforms the existing works from
several aspects.",
acknowledgement = ack-nhfb,
articleno =    "21",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Vural:2014:SFW,
author =       "A. Gural Vural and B. Barla Cambazoglu and Pinar
Karagoz",
title =        "Sentiment-Focused {Web} Crawling",
journal =      j-TWEB,
volume =       "8",
number =       "4",
pages =        "22:1--22:??",
month =        oct,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2644821",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Nov 6 16:08:07 MST 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Sentiments and opinions expressed in Web pages towards
objects, entities, and products constitute an important
portion of the textual content available in the Web. In
the last decade, the analysis of such content has
gained importance due to its high potential for
monetization. Despite the vast interest in sentiment
analysis, somewhat surprisingly, the discovery of
sentimental or opinionated Web content is mostly
ignored. This work aims to fill this gap and addresses
the problem of quickly discovering and fetching the
sentimental content present in the Web. To this end, we
design a sentiment-focused Web crawling framework. In
particular, we propose different sentiment-focused Web
crawling strategies that prioritize discovered URLs
based on their predicted sentiment scores. Through
simulations, these strategies are shown to achieve
considerable performance improvement over
general-purpose Web crawling strategies in discovery of
sentimental Web content.",
acknowledgement = ack-nhfb,
articleno =    "22",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Kyusakov:2014:EFE,
author =       "Rumen Kyusakov and Pablo Pu{\~n}al Pereira and Jens
Eliasson and Jerker Delsing",
title =        "{EXIP}: a Framework for Embedded {Web} Development",
journal =      j-TWEB,
volume =       "8",
number =       "4",
pages =        "23:1--23:??",
month =        oct,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2665068",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Nov 6 16:08:07 MST 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Developing and deploying Web applications on networked
embedded devices is often seen as a way to reduce the
development cost and time to market for new target
platforms. However, the size of the messages and the
processing requirements of today's Web protocols, such
as HTTP and XML, are challenging for the most
resource-constrained class of devices that could also
benefit from Web connectivity. New Web protocols using
binary representations have been proposed for
addressing this issue. Constrained Application Protocol
(CoAP) reduces the bandwidth and processing
requirements compared to HTTP while preserving the core
concepts of the Web architecture. Similarly, Efficient
XML Interchange (EXI) format has been standardized for
reducing the size and processing time for XML
structured information. Nevertheless, the adoption of
these technologies is lagging behind due to lack of
support from Web browsers and current Web development
presents the design and implementation techniques for
the EXIP framework for embedded Web development. The
framework consists of a highly efficient EXI processor,
a tool for EXI data binding based on templates, and a
CoAP/EXI/XHTML Web page engine. A prototype
implementation of the EXI processor is herein presented
and evaluated. It can be applied to Web browsers or
thin server platforms using XHTML and Web services for
supporting human-machine interactions in the Internet
(1) theoretical and practical evaluation of the use of
binary protocols for embedded Web programming; (2) a
novel method for generation of EXI grammars based on
XML Schema definitions; (3) an algorithm for grammar
concatenation that produces normalized EXI grammars
directly, and hence reduces the number of iterations
during grammar generation; (4) an algorithm for
efficient representation of possible deviations from
the XML schema.",
acknowledgement = ack-nhfb,
articleno =    "23",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Thomas:2014:UID,
author =       "Paul Thomas",
title =        "Using Interaction Data to Explain Difficulty
Navigating Online",
journal =      j-TWEB,
volume =       "8",
number =       "4",
pages =        "24:1--24:??",
month =        oct,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2656343",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Nov 6 16:08:07 MST 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "A user's behaviour when browsing a Web site contains
clues to that user's experience. It is possible to
record some of these behaviours automatically, and
extract signals that indicate a user is having trouble
finding information. This allows for Web site analytics
based on user experiences, not just page impressions. A
series of experiments identified user browsing
behaviours-such as time taken and amount of scrolling
up a page-which predict navigation difficulty and which
can be recorded with minimal or no changes to existing
sites or browsers. In turn, patterns of page views
correlate with these signals and these patterns can
help Web authors understand where and why their sites
are hard to navigate. A new software tool, LATTE,''
automates this analysis and makes it available to Web
authors in the context of the site itself.",
acknowledgement = ack-nhfb,
articleno =    "24",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{White:2014:CBO,
author =       "Ryen W. White and Ahmed Hassan",
title =        "Content Bias in Online Health Search",
journal =      j-TWEB,
volume =       "8",
number =       "4",
pages =        "25:1--25:??",
month =        oct,
year =         "2014",
CODEN =        "????",
DOI =          "http://dx.doi.org/10.1145/2663355",
ISSN =         "1559-1131 (print), 1559-114X (electronic)",
ISSN-L =       "1559-1131",
bibdate =      "Thu Nov 6 16:08:07 MST 2014",
bibsource =    "http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/tweb.bib",
abstract =     "Search engines help people answer consequential
questions. Biases in retrieved and indexed content
(e.g., skew toward erroneous outcomes that represent
deviations from reality), coupled with searchers'
biases in how they examine and interpret search
article, we seek to better understand biases in search
and retrieval, and in particular those affecting the
accuracy of content in search results, including the
search engine index, features used for ranking, and the
formulation of search queries. Focusing on the
important domain of online health search, this research
broadens previous work on biases in search to examine
the role of search systems in contributing to biases.
To assess bias, we focus on questions about medical
interventions and employ reliable ground truth data
from authoritative medical sources. In the course of
our study, we utilize large-scale log analysis using
data from a popular Web search engine, deep probes of
result lists on that search engine, and crowdsourced
human judgments of search result captions and landing
pages. Our findings reveal bias in results, amplifying
searchers' existing biases that appear evident in their
search activity. We also highlight significant bias in
indexed content and show that specific ranking signals
and specific query terms support bias. Both of these
can degrade result accuracy and increase skewness in
search results. Our analysis has implications for bias
mitigation strategies in online search systems, and we
offer recommendations for search providers based on our
findings.",
acknowledgement = ack-nhfb,
articleno =    "25",
fjournal =     "ACM Transactions on the Web (TWEB)",
journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}
`