%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "1.48", %%% date = "19 August 2023", %%% time = "07:34:53 MDT", %%% filename = "tweb.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% FAX = "+1 801 581 4148", %%% URL = "http://www.math.utah.edu/~beebe", %%% checksum = "05550 16115 89716 829220", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "ACM Transactions on the Web (TWEB); %%% bibliography; TWEB", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a COMPLETE BibTeX bibliography for %%% ACM Transactions on the Web (TWEB) (CODEN %%% ????, ISSN 1559-1131), covering all journal %%% issues from 2007 -- date. %%% %%% At version 1.48, the COMPLETE journal %%% coverage looked like this: %%% %%% 2007 ( 14) 2013 ( 30) 2019 ( 20) %%% 2008 ( 22) 2014 ( 19) 2020 ( 19) %%% 2009 ( 14) 2015 ( 20) 2021 ( 20) %%% 2010 ( 17) 2016 ( 24) 2022 ( 21) %%% 2011 ( 21) 2017 ( 25) 2023 ( 24) %%% 2012 ( 18) 2018 ( 28) %%% %%% Article: 356 %%% %%% Total entries: 356 %%% %%% The journal Web page can be found at: %%% %%% http://www.acm.org/pubs/tweb.html %%% %%% The journal table of contents page is at: %%% %%% http://www.acm.org/tweb/ %%% http://portal.acm.org/browse_dl.cfm?idx=J1062 %%% %%% Qualified subscribers can retrieve the full %%% text of recent articles in PDF form. %%% %%% The initial draft was extracted from the ACM %%% Web pages. %%% %%% ACM copyrights explicitly permit abstracting %%% with credit, so article abstracts, keywords, %%% and subject classifications have been %%% included in this bibliography wherever %%% available. Article reviews have been %%% omitted, until their copyright status has %%% been clarified. %%% %%% bibsource keys in the bibliography entries %%% below indicate the entry originally came %%% from the computer science bibliography %%% archive, even though it has likely since %%% been corrected and updated. %%% %%% URL keys in the bibliography point to %%% World Wide Web locations of additional %%% information about the entry. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed for the %%% BibNet Project. %%% %%% In this bibliography, entries are sorted in %%% publication order, using ``bibsort -byvolume.'' %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility." %%% } %%% ==================================================================== @Preamble{"\input bibnames.sty" # "\def \TM {${}^{\sc TM}$}" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, FAX: +1 801 581 4148, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|http://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-TWEB = "ACM Transactions on the Web (TWEB)"} %%% ==================================================================== %%% Bibliography entries: @Article{Ashman:2007:I, author = "Helen Ashman and Arun Iyengar", title = "Introduction", journal = j-TWEB, volume = "1", number = "1", pages = "1:1--1:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1232722.1232723", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:16:53 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Urgaonkar:2007:AMM, author = "Bhuvan Urgaonkar and Giovanni Pacifici and Prashant Shenoy and Mike Spreitzer and Asser Tantawi", title = "Analytic modeling of multitier {Internet} applications", journal = j-TWEB, volume = "1", number = "1", pages = "2:1--2:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1232722.1232724", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:16:53 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Since many Internet applications employ a multitier architecture, in this article, we focus on the problem of analytically modeling the behavior of such applications. We present a model based on a network of queues where the queues represent different tiers of the application. Our model is sufficiently general to capture (i) the behavior of tiers with significantly different performance characteristics and (ii) application idiosyncrasies such as session-based workloads, tier replication, load imbalances across replicas, and caching at intermediate tiers. We validate our model using real multitier applications running on a Linux server cluster. Our experiments indicate that our model faithfully captures the performance of these applications for a number of workloads and configurations. Furthermore, our model successfully handles a comprehensive range of resource utilization---from 0 to near saturation for the CPU---for two separate tiers. For a variety of scenarios, including those with caching at one of the application tiers, the average response times predicted by our model were within the 95\% confidence intervals of the observed average response times. Our experiments also demonstrate the utility of the model for dynamic capacity provisioning, performance prediction, bottleneck identification, and session policing. In one scenario, where the request arrival rate increased from less than 1500 to nearly 4200 requests/minute, a dynamic provisioning technique employing our model was able to maintain response time targets by increasing the capacity of two of the tiers by factors of 2 and 3.5, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "analytical model; dynamic provisioning; hosting platform; Internet service; mean-value analysis; performance prediction; policing; queuing theory; session; tier", } @Article{Jansen:2007:CES, author = "Bernard J. Jansen", title = "The comparative effectiveness of sponsored and nonsponsored links for {Web} e-commerce queries", journal = j-TWEB, volume = "1", number = "1", pages = "3:1--3:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1232722.1232725", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:16:53 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The predominant business model for Web search engines is sponsored search, which generates billions in yearly revenue. But are sponsored links providing online consumers with relevant choices for products and services? We address this and related issues by investigating the relevance of sponsored and nonsponsored links for e-commerce queries on the major search engines. The results show that average relevance ratings for sponsored and nonsponsored links are practically the same, although the relevance ratings for sponsored links are statistically higher. We used 108 ecommerce queries and 8,256 retrieved links for these queries from three major Web search engines: Yahoo!, Google, and MSN. In addition to relevance measures, we qualitatively analyzed the e-commerce queries, deriving five categorizations of underlying information needs. Product-specific queries are the most prevalent (48\%). Title (62\%) and summary (33\%) are the primary basis for evaluating sponsored links with URL a distant third (2\%). To gauge the effectiveness of sponsored search campaigns, we analyzed the sponsored links from various viewpoints. It appears that links from organizations with large sponsored search campaigns are more relevant than the average sponsored link. We discuss the implications for Web search engines and sponsored search as a long-term business model and as a mechanism for finding relevant information for searchers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "e-commerce searching; sponsored links; sponsored results; sponsored search; Web search engines; Web searching", } @Article{Church:2007:MIA, author = "Karen Church and Barry Smyth and Paul Cotter and Keith Bradley", title = "Mobile information access: a study of emerging search behavior on the mobile {Internet}", journal = j-TWEB, volume = "1", number = "1", pages = "4:1--4:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1232722.1232726", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:16:53 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "It is likely that mobile phones will soon come to rival more traditional devices as the primary platform for information access. Consequently, it is important to understand the emerging information access behavior of mobile Internet (MI) users especially in relation to their use of mobile handsets for information browsing and query-based search. In this article, we describe the results of a recent analysis of the MI habits of more than 600,000 European MI users, with a particular emphasis on the emerging interest in mobile search. We consider a range of factors including whether there are key differences between browsing and search behavior on the MI compared to the Web. We highlight how browsing continues to dominate mobile information access, but go on to show how search is becoming an increasingly popular information access alternative especially in relation to certain types of mobile handsets and information needs. Moreover, we show that sessions involving search tend to be longer and more data-rich than those that do not involve search. We also look at the type of queries used during mobile search and the way that these queries tend to be modified during the course of a mobile search session. Finally we examine the overlap among mobile search queries and the different topics mobile users are interested in.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "log analysis; Mobile browsing; mobile Internet; mobile search", } @Article{Leskovec:2007:DVM, author = "Jure Leskovec and Lada A. Adamic and Bernardo A. Huberman", title = "The dynamics of viral marketing", journal = j-TWEB, volume = "1", number = "1", pages = "5:1--5:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1232722.1232727", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:16:53 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We present an analysis of a person-to-person recommendation network, consisting of 4 million people who made 16 million recommendations on half a million products. We observe the propagation of recommendations and the cascade sizes, which we explain by a simple stochastic model. We analyze how user behavior varies within user communities defined by a recommendation network. Product purchases follow a `long tail' where a significant share of purchases belongs to rarely sold items. We establish how the recommendation network grows over time and how effective it is from the viewpoint of the sender and receiver of the recommendations. While on average recommendations are not very effective at inducing purchases and do not spread very far, we present a model that successfully identifies communities, product, and pricing categories for which viral marketing seems to be very effective.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "e-commerce; long tail; network analysis; recommender systems; Viral marketing; word-of-mouth", } @Article{Yu:2007:EAW, author = "Tao Yu and Yue Zhang and Kwei-Jay Lin", title = "Efficient algorithms for {Web} services selection with end-to-end {QoS} constraints", journal = j-TWEB, volume = "1", number = "1", pages = "6:1--6:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1232722.1232728", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:16:53 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Service-Oriented Architecture (SOA) provides a flexible framework for service composition. Using standard-based protocols (such as SOAP and WSDL), composite services can be constructed by integrating atomic services developed independently. Algorithms are needed to select service components with various QoS levels according to some application-dependent performance requirements. We design a broker-based architecture to facilitate the selection of QoS-based services. The objective of service selection is to maximize an application-specific utility function under the end-to-end QoS constraints. The problem is modeled in two ways: the combinatorial model and the graph model. The combinatorial model defines the problem as a multidimension multichoice 0-1 knapsack problem (MMKP). The graph model defines the problem as a multiconstraint optimal path (MCOP) problem. Efficient heuristic algorithms for service processes of different composition structures are presented in this article and their performances are studied by simulations. We also compare the pros and cons between the two models.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "End-to-end QoS; service composition; service oriented architecture (SOA); service selection; Web services", } @Article{Dubinko:2007:VTT, author = "Micah Dubinko and Ravi Kumar and Joseph Magnani and Jasmine Novak and Prabhakar Raghavan and Andrew Tomkins", title = "Visualizing tags over time", journal = j-TWEB, volume = "1", number = "2", pages = "7:1--7:??", month = aug, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1255438.1255439", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:06 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We consider the problem of visualizing the evolution of tags within the Flickr (flickr.com) online image sharing community. Any user of the Flickr service may append a tag to any photo in the system. Over the past year, users have on average added over a million tags each week. Understanding the evolution of these tags over time is therefore a challenging task. We present a new approach based on a characterization of the most interesting tags associated with a sliding interval of time. An animation provided via Flash in a Web browser allows the user to observe and interact with the interesting tags as they evolve over time.\par New algorithms and data structures are required to support the efficient generation of this visualization. We combine a novel solution to an interval covering problem with extensions to previous work on score aggregation in order to create an efficient backend system capable of producing visualizations at arbitrary scales on this large dataset in real time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Flickr; interval covering; tags; temporal evolution; visualization", } @Article{Mohan:2007:SPC, author = "Bharath Kumar Mohan and Benjamin J. Keller and Naren Ramakrishnan", title = "Scouts, promoters, and connectors: {The} roles of ratings in nearest-neighbor collaborative filtering", journal = j-TWEB, volume = "1", number = "2", pages = "8:1--8:??", month = aug, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1255438.1255440", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:06 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Recommender systems aggregate individual user ratings into predictions of products or services that might interest visitors. The quality of this aggregation process crucially affects the user experience and hence the effectiveness of recommenders in e-commerce. We present a characterization of nearest-neighbor collaborative filtering that allows us to disaggregate global recommender performance measures into contributions made by each individual rating. In particular, we formulate three roles--- {\em scouts}, {\em promoters}, and {\em connectors\/} ---that capture how users receive recommendations, how items get recommended, and how ratings of these two types are themselves connected, respectively. These roles find direct uses in improving recommendations for users, in better targeting of items and, most importantly, in helping monitor the health of the system as a whole. For instance, they can be used to track the evolution of neighborhoods, to identify rating subspaces that do not contribute (or contribute negatively) to system performance, to enumerate users who are in danger of leaving, and to assess the susceptibility of the system to attacks such as shilling. We argue that the three rating roles presented here provide broad primitives to manage a recommender system and its community.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "collaborative filtering; connectors; neighborhoods; promoters; Recommender systems; scouts; user-based and item-based algorithms", } @Article{Rogers:2007:EPB, author = "Alex Rogers and Esther David and Nicholas R. Jennings and Jeremy Schiff", title = "The effects of proxy bidding and minimum bid increments within {eBay} auctions", journal = j-TWEB, volume = "1", number = "2", pages = "9:1--9:??", month = aug, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1255438.1255441", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:06 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We present a mathematical model of the eBay auction protocol and perform a detailed analysis of the effects that the eBay proxy bidding system and the minimum bid increment have on the auction properties. We first consider the revenue of the auction, and we show analytically that when two bidders with independent private valuations use the eBay proxy bidding system there exists an optimal value for the minimum bid increment at which the auctioneer's revenue is maximized. We then consider the sequential way in which bids are placed within the auction, and we show analytically that independent of assumptions regarding the bidders' valuation distribution or bidding strategy the number of visible bids placed is related to the logarithm of the number of potential bidders. Thus, in many cases, it is only a minority of the potential bidders that are able to submit bids and are visible in the auction bid history (despite the fact that the other hidden bidders are still effectively competing for the item). Furthermore, we show through simulation that the minimum bid increment also introduces an inefficiency to the auction, whereby a bidder who enters the auction late may find that its valuation is insufficient to allow them to advance the current bid by the minimum bid increment despite them actually having the highest valuation for the item. Finally, we use these results to consider appropriate strategies for bidders within real world eBay auctions. We show that while last-minute bidding (sniping) is an effective strategy against bidders engaging in incremental bidding (and against those with common values), in general, delaying bidding is disadvantageous even if delayed bids are sure to be received before the auction closes. Thus, when several bidders submit last-minute bids, we show that rather than seeking to bid as late as possible, a bidder should try to be the first sniper to bid (i.e., it should ``snipe before the snipers'').", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "bid increment; electronic commerce; Online auctions; proxy bidding; sniping", } @Article{Serrano:2007:DSW, author = "M. {\'A}ngeles Serrano and Ana Maguitman and Mari{\'a}n Bogu{\~n}{\'a} and Santo Fortunato and Alessandro Vespignani", title = "Decoding the structure of the {WWW}: a comparative analysis of {Web} crawls", journal = j-TWEB, volume = "1", number = "2", pages = "10:1--10:??", month = aug, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1255438.1255442", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:06 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The understanding of the immense and intricate topological structure of the World Wide Web (WWW) is a major scientific and technological challenge. This has been recently tackled by characterizing the properties of its representative graphs, in which vertices and directed edges are identified with Web pages and hyperlinks, respectively. Data gathered in large-scale crawls have been analyzed by several groups resulting in a general picture of the WWW that encompasses many of the complex properties typical of rapidly evolving networks. In this article, we report a detailed statistical analysis of the topological properties of four different WWW graphs obtained with different crawlers. We find that, despite the very large size of the samples, the statistical measures characterizing these graphs differ quantitatively, and in some cases qualitatively, depending on the domain analyzed and the crawl used for gathering the data. This spurs the issue of the presence of sampling biases and structural differences of Web crawls that might induce properties not representative of the actual global underlying graph. In short, the stability of the widely accepted statistical description of the Web is called into question. In order to provide a more accurate characterization of the Web graph, we study statistical measures beyond the degree distribution, such as degree-degree correlation functions or the statistics of reciprocal connections. The latter appears to enclose the relevant correlations of the WWW graph and carry most of the topological information of the Web. The analysis of this quantity is also of major interest in relation to the navigability and searchability of the Web.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "crawler biases; statistical analysis; Web graph structure; Web measurement", } @Article{Reis:2007:BVD, author = "Charles Reis and John Dunagan and Helen J. Wang and Opher Dubrovsky and Saher Esmeir", title = "{BrowserShield}: {Vulnerability}-driven filtering of dynamic {HTML}", journal = j-TWEB, volume = "1", number = "3", pages = "11:1--11:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1281480.1281481", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:14 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Vulnerability-driven filtering of network data can offer a fast and easy-to-deploy alternative or intermediary to software patching, as exemplified in Shield [Wang et al. 2004]. In this article, we take Shield's vision to a new domain, inspecting and cleansing not just static content, but also dynamic content. The dynamic content we target is the dynamic HTML in Web pages, which have become a popular vector for attacks. The key challenge in filtering dynamic HTML is that it is undecidable to statically determine whether an embedded script will exploit the browser at runtime. We avoid this undecidability problem by rewriting web pages and any embedded scripts into safe equivalents, inserting checks so that the filtering is done at runtime. The rewritten pages contain logic for recursively applying runtime checks to dynamically generated or modified web content, based on known vulnerabilities. We have built and evaluated {\em BrowserShield}, a general framework that performs this dynamic instrumentation of embedded scripts, and that admits policies for customized runtime actions like vulnerability-driven filtering. We also explore other applications on top of BrowserShield.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "code rewriting; JavaScript; vulnerability; Web browser", } @Article{Sun:2007:MDW, author = "Zan Sun and Jalal Mahmud and I. V. Ramakrishnan and Saikat Mukherjee", title = "Model-directed {Web} transactions under constrained modalities", journal = j-TWEB, volume = "1", number = "3", pages = "12:1--12:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1281480.1281482", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:14 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Online transactions (e.g., buying a book on the Web) typically involve a number of steps spanning several pages. Conducting such transactions under constrained interaction modalities as exemplified by small screen handhelds or interactive speech interfaces --- the primary mode of communication for visually impaired individuals --- is a strenuous, fatigue-inducing activity. But usually one needs to browse only a small fragment of a Web page to perform a transactional step such as a form fillout, selecting an item from a search results list, and so on. We exploit this observation to develop an automata-based process model that delivers only the ``relevant'' page fragments at each transactional step, thereby reducing information overload on such narrow interaction bandwidths. We realize this model by coupling techniques from content analysis of Web documents, automata learning and statistical classification. The process model and associated techniques have been incorporated into Guide-O, a prototype system that facilitates online transactions using speech/keyboard interface (Guide-O-Speech), or with limited-display size handhelds (Guide-O-Mobile). Performance of Guide-O and its user experience are reported.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "assistive device; content adaption; Web transaction", } @Article{Sharman:2007:CAD, author = "Raj Sharman and Shiva Shankar Ramanna and Ram Ramesh and Ram Gopal", title = "Cache architecture for on-demand streaming on the {Web}", journal = j-TWEB, volume = "1", number = "3", pages = "13:1--13:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1281480.1281483", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:14 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "On-demand streaming from a remote server through best-effort Internet poses several challenges because of network losses and variable delays. The primary technique used to improve the quality of distributed content service is replication. In the context of the Internet, Web caching is the traditional mechanism that is used. In this article we develop a new staged delivery model for a distributed architecture in which video is streamed from remote servers to edge caches where the video is buffered and then streamed to the client through a last-mile connection. The model uses a novel revolving indexed cache buffer management mechanism at the edge cache and employs selective retransmissions of lost packets between the remote and edge cache for a best-effort recovery of the losses. The new Web cache buffer management scheme includes a dynamic adjustment of cache buffer parameters based on network conditions. In addition, performance of buffer management and retransmission policies at the edge cache is modeled and assessed using a probabilistic analysis of the streaming process as well as system simulations. The influence of different endogenous control parameters on the quality of stream received by the client is studied. Calibration curves on the QoS metrics for different network conditions have been obtained using simulations. Edge cache management can be done using these calibration curves. ISPs can make use of calibration curves to set the values of the endogenous control parameters for specific QoS in real-time streaming operations based on network conditions. A methodology to benchmark transmission characteristics using real-time traffic data is developed to enable effective decision making on edge cache buffer allocation and management strategies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "buffering; edge cache; on-demand streaming; quality of service; selective retransmissions; Web caching", } @Article{Zdun:2007:MPD, author = "Uwe Zdun and Carsten Hentrich and Schahram Dustdar", title = "Modeling process-driven and service-oriented architectures using patterns and pattern primitives", journal = j-TWEB, volume = "1", number = "3", pages = "14:1--14:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1281480.1281484", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:14 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Service-oriented architectures are increasingly used in the context of business processes. However, the proven practices for process-oriented integration of services are not well documented yet. In addition, modeling approaches for the integration of processes and services are neither mature nor do they exactly reflect the proven practices. In this article, we propose a pattern language for process-oriented integration of services to describe the proven practices. Our main contribution is a modeling concept based on pattern primitives for these patterns. A pattern primitive is a fundamental, precisely specified modeling element that represents a pattern. We present a catalog of pattern primitives that are precisely modeled using OCL constraints and map these primitives to the patterns in the pattern language of process-oriented integration of services. We also present a model validation tool that we have developed to support modeling the process-oriented integration of services, and an industrial case study in which we have applied our results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "middleware; Service-oriented architecture; software patterns", } @Article{Najork:2008:ISS, author = "Marc Najork and Brian D. Davison", title = "Introduction to special section on adversarial issues in {Web} search", journal = j-TWEB, volume = "2", number = "1", pages = "1:1--1:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326561.1326562", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:25 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Becchetti:2008:LAW, author = "Luca Becchetti and Carlos Castillo and Debora Donato and Ricardo Baeza-Yates and Stefano Leonardi", title = "Link analysis for {Web} spam detection", journal = j-TWEB, volume = "2", number = "1", pages = "2:1--2:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326561.1326563", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:25 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We propose link-based techniques for automatic detection of Web spam, a term referring to pages which use deceptive techniques to obtain undeservedly high scores in search engines. The use of Web spam is widespread and difficult to solve, mostly due to the large size of the Web which means that, in practice, many algorithms are infeasible.\par We perform a statistical analysis of a large collection of Web pages. In particular, we compute statistics of the links in the vicinity of every Web page applying rank propagation and probabilistic counting over the entire Web graph in a scalable way. These statistical features are used to build Web spam classifiers which only consider the link structure of the Web, regardless of page contents. We then present a study of the performance of each of the classifiers alone, as well as their combined performance, by testing them over a large collection of Web link spam. After tenfold cross-validation, our best classifiers have a performance comparable to that of state-of-the-art spam classifiers that use content attributes, but are orthogonal to content-based methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "adversarial information retrieval; Link analysis", } @Article{Urvoy:2008:TWS, author = "Tanguy Urvoy and Emmanuel Chauveau and Pascal Filoche and Thomas Lavergne", title = "Tracking {Web} spam with {HTML} style similarities", journal = j-TWEB, volume = "2", number = "1", pages = "3:1--3:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326561.1326564", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:25 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Automatically generated content is ubiquitous in the web: dynamic sites built using the three-tier paradigm are good examples (e.g., commercial sites, blogs and other sites edited using web authoring software), as well as less legitimate spamdexing attempts (e.g., link farms, faked directories).\par Those pages built using the same generating method (template or script) share a common ``look and feel'' that is not easily detected by common text classification methods, but is more related to stylometry.\par In this work we study and compare several HTML style similarity measures based on both textual and extra-textual features in HTML source code. We also propose a flexible algorithm to cluster a large collection of documents according to these measures. Since the proposed algorithm is based on locality sensitive hashing (LSH), we first review this technique.\par We then describe how to use the HTML style similarity clusters to pinpoint dubious pages and enhance the quality of spam classifiers. We present an evaluation of our algorithm on the WEBSPAM-UK2006 dataset.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Clustering; document similarity; search engine spam; stylometry; templates identification", } @Article{Lin:2008:DST, author = "Yu-Ru Lin and Hari Sundaram and Yun Chi and Junichi Tatemura and Belle L. Tseng", title = "Detecting splogs via temporal dynamics using self-similarity analysis", journal = j-TWEB, volume = "2", number = "1", pages = "4:1--4:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326561.1326565", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:25 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article addresses the problem of spam blog (splog) detection using temporal and structural regularity of content, post time and links. Splogs are undesirable blogs meant to attract search engine traffic, used solely for promoting affiliate sites. Blogs represent popular online media, and splogs not only degrade the quality of search engine results, but also waste network resources. The splog detection problem is made difficult due to the lack of stable content descriptors.\par We have developed a new technique for detecting splogs, based on the observation that a blog is a dynamic, growing sequence of entries (or posts) rather than a collection of individual pages. In our approach, splogs are recognized by their temporal characteristics and content. There are three key ideas in our splog detection framework. (a) We represent the blog temporal dynamics using self-similarity matrices defined on the histogram intersection similarity measure of the time, content, and link attributes of posts, to investigate the temporal changes of the post sequence. (b) We study the blog temporal characteristics using a visual representation derived from the self-similarity measures. The visual signature reveals correlation between attributes and posts, depending on the type of blogs (normal blogs and splogs). (c) We propose two types of novel temporal features to capture the splog temporal characteristics. In our splog detector, these novel features are combined with content based features. We extract a content based feature vector from blog home pages as well as from different parts of the blog. The dimensionality of the feature vector is reduced by Fisher linear discriminant analysis. We have tested an SVM-based splog detector using proposed features on real world datasets, with appreciable results (90\% accuracy).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Blogs; regularity; self-similarity; spam; splog detection; temporal dynamics; topology", } @Article{Weinreich:2008:QAE, author = "Harald Weinreich and Hartmut Obendorf and Eelco Herder and Matthias Mayer", title = "Not quite the average: an empirical study of {Web} use", journal = j-TWEB, volume = "2", number = "1", pages = "5:1--5:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326561.1326566", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:25 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In the past decade, the World Wide Web has been subject to dramatic changes. Web sites have evolved from static information resources to dynamic and interactive applications that are used for a broad scope of activities on a daily basis. To examine the consequences of these changes on user behavior, we conducted a long-term client-side Web usage study with twenty-five participants. This report presents results of this study and compares the user behavior with previous long-term browser usage studies, which range in age from seven to thirteen years. Based on the empirical data and the interview results, various implications for the interface design of browsers and Web sites are discussed.\par A major finding is the decreasing prominence of backtracking in Web navigation. This can largely be attributed to the increasing importance of dynamic, service-oriented Web sites. Users do not navigate on these sites searching for information, but rather interact with an online application to complete certain tasks. Furthermore, the usage of multiple windows and tabs has partly replaced back button usage, posing new challenges for user orientation and backtracking. We found that Web browsing is a rapid activity even for pages with substantial content, which calls for page designs that allow for cursory reading. Click maps provide additional information on how users interact with the Web on page level. Finally, substantial differences were observed between users, and characteristic usage patterns for different types of Web sites emphasize the need for more adaptive and customizable Web browsers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "browser interfaces; hypertext; Navigation; usability; user study; Web; web browsing; web design; WWW", } @Article{Yu:2008:FWS, author = "Qi Yu and Athman Bouguettaya", title = "Framework for {Web} service query algebra and optimization", journal = j-TWEB, volume = "2", number = "1", pages = "6:1--6:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326561.1326567", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:25 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We present a query algebra that supports optimized access of Web services through service-oriented queries. The service query algebra is defined based on a formal service model that provides a high-level abstraction of Web services across an application domain. The algebra defines a set of algebraic operators. Algebraic service queries can be formulated using these operators. This allows users to query their desired services based on both functionality and quality. We provide the implementation of each algebraic operator. This enables the generation of Service Execution Plans (SEPs) that can be used by users to directly access services. We present an optimization algorithm by extending the Dynamic Programming (DP) approach to efficiently select the SEPs with the best user-desired quality. The experimental study validates the proposed algorithm by demonstrating significant performance improvement compared with the traditional DP approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "query optimization; service oriented computing; service query; Web service", } @Article{Aleman-Meza:2008:SSA, author = "Boanerges Aleman-Meza and Meenakshi Nagarajan and Li Ding and Amit Sheth and I. Budak Arpinar and Anupam Joshi and Tim Finin", title = "Scalable semantic analytics on social networks for addressing the problem of conflict of interest detection", journal = j-TWEB, volume = "2", number = "1", pages = "7:1--7:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326561.1326568", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:25 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In this article, we demonstrate the applicability of semantic techniques for detection of Conflict of Interest (COI). We explain the common challenges involved in building scalable Semantic Web applications, in particular those addressing connecting-the-dots problems. We describe in detail the challenges involved in two important aspects on building Semantic Web applications, namely, data acquisition and entity disambiguation (or reference reconciliation). We extend upon our previous work where we integrated the collaborative network of a subset of DBLP researchers with persons in a Friend-of-a-Friend social network (FOAF). Our method finds the connections between people, measures collaboration strength, and includes heuristics that use friendship/affiliation information to provide an estimate of potential COI in a peer-review scenario. Evaluations are presented by measuring what could have been the COI between accepted papers in various conference tracks and their respective program committee members. The experimental results demonstrate that scalability can be achieved by using a dataset of over 3 million entities (all bibliographic data from DBLP and a large collection of FOAF documents).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "conflict of interest; data fusion; DBLP; entity disambiguation; ontologies; peer review process; RDF; semantic analytics; semantic associations; Semantic Web; social networks; swetoDblp", } @Article{Gmach:2008:AQS, author = "Daniel Gmach and Stefan Krompass and Andreas Scholz and Martin Wimmer and Alfons Kemper", title = "Adaptive quality of service management for enterprise services", journal = j-TWEB, volume = "2", number = "1", pages = "8:1--8:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326561.1326569", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:25 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In the past, enterprise resource planning systems were designed as monolithic software systems running on centralized mainframes. Today, these systems are (re-)designed as a repository of enterprise services that are distributed throughout the available computing infrastructure. These service oriented architectures (SOAs) require advanced automatic and adaptive management concepts in order to achieve a high quality of service level in terms of, for example, availability, responsiveness, and throughput. The adaptive management has to allocate service instances to computing resources, adapt the resource allocation to unforeseen load fluctuations, and intelligently schedule individual requests to guarantee negotiated service level agreements (SLAs). Our AutoGlobe platform provides such a comprehensive adaptive service management comprising\par --- static service-to-server allocation based on automatically detected service utilization patterns,\par --- adaptive service management based on a fuzzy controller that remedies exceptional situations by automatically initiating, for example, service migration, service replication (scale-out), and\par --- adaptive scheduling of individual service requests that prioritizes requests depending on the current degree of service level conformance.\par All three complementary control components are described in detail, and their effectiveness is analyzed by means of realistic business application scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "fuzzy controller; Quality of service; workload characterization", } @Article{Yang:2008:DGN, author = "Bo Yang and Jiming Liu", title = "Discovering global network communities based on local centralities", journal = j-TWEB, volume = "2", number = "1", pages = "9:1--9:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1326561.1326570", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:25 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "One of the central problems in studying and understanding complex networks, such as online social networks or World Wide Web, is to discover hidden, either physically (e.g., interactions or hyperlinks) or logically (e.g., profiles or semantics) well-defined topological structures. From a practical point of view, a good example of such structures would be so-called network communities. Earlier studies have introduced various formulations as well as methods for the problem of identifying or extracting communities. While each of them has pros and cons as far as the effectiveness and efficiency are concerned, almost none of them has explicitly dealt with the potential relationship between the global topological property of a network and the local property of individual nodes. In order to study this problem, this paper presents a new algorithm, called ICS, which aims to discover natural network communities by inferring from the local information of nodes inherently hidden in networks based on a new centrality, that is, clustering centrality, which is a generalization of eigenvector centrality. As compared with existing methods, our method runs efficiently with a good clustering performance. Additionally, it is insensitive to its built-in parameters and prior knowledge.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "centrality; community mining; Complex network; graph theory; World Wide Web", } @Article{Dustdar:2008:ISI, author = "Schahram Dustdar and Bernd J. Kr{\"a}mer", title = "Introduction to special issue on service oriented computing {(SOC)}", journal = j-TWEB, volume = "2", number = "2", pages = "10:1--10:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1346337.1346338", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:47 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Belhajjame:2008:AAW, author = "Khalid Belhajjame and Suzanne M. Embury and Norman W. Paton and Robert Stevens and Carole A. Goble", title = "Automatic annotation of {Web} services based on workflow definitions", journal = j-TWEB, volume = "2", number = "2", pages = "11:1--11:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1346237.1346239", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:47 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Semantic annotations of web services can support the effective and efficient discovery of services, and guide their composition into workflows. At present, however, the practical utility of such annotations is limited by the small number of service annotations available for general use. Manual annotation of services is a time consuming and thus expensive task, so some means are required by which services can be automatically (or semi-automatically) annotated. In this paper, we show how information can be inferred about the semantics of operation parameters based on their connections to other (annotated) operation parameters within tried-and-tested workflows. Because the data links in the workflows do not necessarily contain every possible connection of compatible parameters, we can infer only constraints on the semantics of parameters. We show that despite their imprecise nature these so-called {\em loose annotations\/} are still of value in supporting the manual annotation task, inspecting workflows and discovering services. We also show that derived annotations for already annotated parameters are useful. By comparing existing and newly derived annotations of operation parameters, we can support the detection of errors in existing annotations, the ontology used for annotation and in workflows. The derivation mechanism has been implemented, and its practical applicability for inferring new annotations has been established through an experimental evaluation. The usefulness of the derived annotations is also demonstrated.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "automatic annotation; ontologies; semantic annotations; Semantic web services; workflows", } @Article{Elgedawy:2008:CAH, author = "Islam Elgedawy and Zahir Tari and James A. Thom", title = "Correctness-aware high-level functional matching approaches for semantic {Web} services", journal = j-TWEB, volume = "2", number = "2", pages = "12:1--12:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1346237.1346240", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:47 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Service matching approaches trade precision for recall, creating the need for users to choose the correct services, which obviously is a major obstacle for automating the service discovery and aggregation processes. Our approach to overcome this problem, is to eliminate the appearance of false positives by returning only the correct services. As different users have different semantics for what is correct, we argue that the correctness of the matching results must be determined according to the achievement of users' goals: that only services achieving users' goals are considered correct. To determine such correctness, we argue that the matching process should be based primarily on the high-level functional specifications (namely goals, achievement contexts, and external behaviors). In this article, we propose models, data structures, algorithms, and theorems required to correctly match such specifications. We propose a model called $ G^+ $, to capture such specifications, for both services and users, in a machine-understandable format. We propose a data structure, called a Concepts Substitutability Graph (CSG), to capture the substitution semantics of application domain concepts in a context-based manner, in order to determine the semantic-preserving mapping transformations required to match different {\em G\/}$^+$ models. We also propose a behavior matching approach that is able to match states in an m-to-n manner, such that behavior models with different numbers of state transitions can be matched. Finally, we show how services are matched and aggregated according to their $ G^+ $ models. Results of supporting experiments demonstrate the advantages of the proposed service matching approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "High-level functional matching; semantic Web services; service aggregation", } @Article{Ryu:2008:SDE, author = "Seung Hwan Ryu and Fabio Casati and Halvard Skogsrud and Boualem Benatallah and R{\'e}gis Saint-Paul", title = "Supporting the dynamic evolution of {Web} service protocols in service-oriented architectures", journal = j-TWEB, volume = "2", number = "2", pages = "13:1--13:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1346237.1346241", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:47 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In service-oriented architectures, everything is a service and everyone is a service provider. Web services (or simply services) are loosely coupled software components that are published, discovered, and invoked across the Web. As the use of Web service grows, in order to correctly interact with them, it is important to understand the business protocols that provide clients with the information on how to interact with services. In dynamic Web service environments, service providers need to constantly adapt their business protocols for reflecting the restrictions and requirements proposed by new applications, new business strategies, and new laws, or for fixing problems found in the protocol definition. However, the effective management of such a protocol evolution raises critical problems: one of the most critical issues is how to handle instances running under the old protocol when it has been changed. Simple solutions, such as aborting them or allowing them to continue to run according to the old protocol, can be considered, but they are inapplicable for many reasons (for example, the loss of work already done and the critical nature of work). In this article, we present a framework that supports service managers in managing the business protocol evolution by providing several features, such as a variety of protocol change impact analyses automatically determining which ongoing instances can be migrated to the new version of protocol, and data mining techniques inferring interaction patterns used for classifying ongoing instances migrateable to the new protocol. To support the protocol evolution process, we have also developed database-backed GUI tools on top of our existing system. The proposed approach and tools can help service managers in managing the evolution of ongoing instances when the business protocols of services with which they are interacting have changed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Business protocols; change impact analysis; decision trees; dynamic evolution; ongoing instances; Web services", } @Article{Schafer:2008:EFA, author = "Michael Sch{\"a}fer and Peter Dolog and Wolfgang Nejdl", title = "An environment for flexible advanced compensations of {Web} service transactions", journal = j-TWEB, volume = "2", number = "2", pages = "14:1--14:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1346237.1346242", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:47 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Business to business integration has recently been performed by employing Web service environments. Moreover, such environments are being provided by major players on the technology markets. Those environments are based on open specifications for transaction coordination. When a failure in such an environment occurs, a compensation can be initiated to recover from the failure. However, current environments have only limited capabilities for compensations, and are usually based on backward recovery. In this article, we introduce an environment to deal with advanced compensations based on forward recovery principles. We extend the existing Web service transaction coordination architecture and infrastructure in order to support flexible compensation operations. We use a contract-based approach, which allows the specification of permitted compensations at runtime. We introduce {\em abstract service\/} and {\em adapter\/} components, which allow us to separate the compensation logic from the coordination logic. In this way, we can easily plug in or plug out different compensation strategies based on a specification language defined on top of basic compensation activities and complex compensation types. Experiments with our approach and environment show that such an approach to compensation is feasible and beneficial. Additionally, we introduce a cost-benefit model to evaluate the proposed environment based on net value analysis. The evaluation shows in which circumstances the environment is economical.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "compensations; forward-recovery; transactions; Web services", } @Article{Srivatsa:2008:MAL, author = "Mudhakar Srivatsa and Arun Iyengar and Jian Yin and Ling Liu", title = "Mitigating application-level denial of service attacks on {Web} servers: a client-transparent approach", journal = j-TWEB, volume = "2", number = "3", pages = "15:1--15:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1377488.1377489", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:58 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Recently, we have seen increasing numbers of denial of service (DoS) attacks against online services and Web applications either for extortion reasons or for impairing and even disabling the competition. These DoS attacks have increasingly targeted the application level. Application-level DoS attacks emulate the same request syntax and network-level traffic characteristics as those of legitimate clients, thereby making the attacks much harder to detect and counter. Moreover, such attacks often target bottleneck resources such as disk bandwidth, database bandwidth, and CPU resources. In this article, we propose handling DoS attacks by using a twofold mechanism. First, we perform admission control to limit the number of concurrent clients served by the online service. Admission control is based on port hiding that renders the online service invisible to unauthorized clients by hiding the port number on which the service accepts incoming requests. Second, we perform congestion control on admitted clients to allocate more resources to good clients. Congestion control is achieved by adaptively setting a client's priority level in response to the client's requests in a way that can incorporate application-level semantics. We present a detailed evaluation of the proposed solution using two sample applications: Apache HTTPD and the TPCW benchmark (running on Apache Tomcat and IBM DB2). Our experiments show that the proposed solution incurs low performance overhead and is resilient to DoS attacks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "client transparency; DoS Attacks; game theory; Web servers", } @Article{White:2008:LPD, author = "Ryen W. White and Mikhail Bilenko and Silviu Cucerzan", title = "Leveraging popular destinations to enhance {Web} search interaction", journal = j-TWEB, volume = "2", number = "3", pages = "16:1--16:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1377488.1377490", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:58 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article presents a novel Web search interaction feature that for a given query provides links to Web sites frequently visited by other users with similar information needs. These popular destinations complement traditional search results, allowing direct navigation to authoritative resources for the query topic. Destinations are identified using the history of the search and browsing behavior of many users over an extended time period, and their collective behavior provides a basis for computing source authority. They are drawn from the end of users' postquery browse trails where users may cease searching once they find relevant information. We describe a user study that compared the suggestion of destinations with the previously proposed suggestion of related queries as well as with traditional, unaided Web search. Results show that search enhanced by query suggestions outperforms other systems in terms of subject perceptions and search effectiveness for fact-finding search tasks. However, search enhanced by destination suggestions performs best for exploratory tasks with its best performance obtained from mining past user behavior at query-level granularity. We discuss the implications of these and other findings from our study for the design of search systems that utilize user behavior, in particular, user browse trails and popular destinations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "enhanced Web search; search destinations; User studies", } @Article{Andreolini:2008:MFS, author = "Mauro Andreolini and Sara Casolari and Michele Colajanni", title = "Models and framework for supporting runtime decisions in {Web-based} systems", journal = j-TWEB, volume = "2", number = "3", pages = "17:1--17:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1377488.1377491", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:17:58 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Efficient management of distributed Web-based systems requires several mechanisms that decide on request dispatching, load balance, admission control, request redirection. The algorithms behind these mechanisms typically make fast decisions on the basis of the load conditions of the system resources. The architecture complexity and workloads characterizing most Web-based services make it extremely difficult to deduce a representative view of a resource load from collected measures that show extreme variability even at different time scales. Hence, any decision based on instantaneous or average views of the system load may lead to useless or even wrong actions. As an alternative, we propose a two-phase strategy that first aims to obtain a representative view of the load trend from measured system values and then applies this representation to support runtime decision systems. We consider two classical problems behind decisions: how to detect significant and nontransient load changes of a system resource and how to predict its future load behavior. The two-phase strategy is based on stochastic functions that are characterized by a computational complexity that is compatible with runtime decisions. We describe, test, and tune the two-phase strategy by considering as a first example a multitier Web-based system that is subject to different classes of realistic and synthetic workloads. Also, we integrate the proposed strategy into a framework that we validate by applying it to support runtime decisions in a cluster Web system and in a locally distributed Network Intrusion Detection System.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "distributed systems; load change detection; load prediction; load representation; World Wide Web", } @Article{Amitay:2008:ISI, author = "Einat Amitay and Andrei Broder", title = "Introduction to special issue on query log analysis: {Technology} and ethics", journal = j-TWEB, volume = "2", number = "4", pages = "18:1--18:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1409220.1409221", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:06 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cooper:2008:SQL, author = "Alissa Cooper", title = "A survey of query log privacy-enhancing techniques from a policy perspective", journal = j-TWEB, volume = "2", number = "4", pages = "19:1--19:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1409220.1409222", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:06 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "As popular search engines face the sometimes conflicting interests of protecting privacy while retaining query logs for a variety of uses, numerous technical measures have been suggested to both enhance privacy and preserve at least a portion of the utility of query logs. This article seeks to assess seven of these techniques against three sets of criteria: (1) how well the technique protects privacy, (2) how well the technique preserves the utility of the query logs, and (3) how well the technique might be implemented as a user control. A user control is defined as a mechanism that allows individual Internet users to choose to have the technique applied to their own query logs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "log; policy; Privacy; query; search", } @Article{Baeza-Yates:2008:DTO, author = "Ricardo Baeza-Yates and Aristides Gionis and Flavio P. Junqueira and Vanessa Murdock and Vassilis Plachouras and Fabrizio Silvestri", title = "Design trade-offs for search engine caching", journal = j-TWEB, volume = "2", number = "4", pages = "20:1--20:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1409220.1409223", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:06 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In this article we study the trade-offs in designing efficient caching systems for Web search engines. We explore the impact of different approaches, such as static vs. dynamic caching, and caching query results vs. caching posting lists. Using a query log spanning a whole year, we explore the limitations of caching and we demonstrate that caching posting lists can achieve higher hit rates than caching query answers. We propose a new algorithm for static caching of posting lists, which outperforms previous methods. We also study the problem of finding the optimal way to split the static cache between answers and posting lists. Finally, we measure how the changes in the query log influence the effectiveness of static caching, given our observation that the distribution of the queries changes slowly over time. Our results and observations are applicable to different levels of the data-access hierarchy, for instance, for a memory/disk layer or a broker/remote server layer.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Caching; query logs; Web search", } @Article{Richardson:2008:LAW, author = "Matthew Richardson", title = "Learning about the world through long-term query logs", journal = j-TWEB, volume = "2", number = "4", pages = "21:1--21:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1409220.1409224", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:06 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In this article, we demonstrate the value of long-term query logs. Most work on query logs to date considers only short-term (within-session) query information. In contrast, we show that long-term query logs can be used to learn about the world we live in. There are many applications of this that lead not only to improving the search engine for its users, but also potentially to advances in other disciplines such as medicine, sociology, economics, and more. In this article, we will show how long-term query logs can be used for these purposes, and that their potential is severely reduced if the logs are limited to short time horizons. We show that query effects are long-lasting, provide valuable information, and might be used to automatically make medical discoveries, build concept hierarchies, and generally learn about the sociological behavior of users. We believe these applications are only the beginning of what can be done with the information contained in long-term query logs, and see this work as a step toward unlocking their potential.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "data mining; knowledge discovery; query logs; user behavior", } @Article{Koutrika:2008:CST, author = "Georgia Koutrika and Frans Adjie Effendi and Zolt{\'{}}n Gy{\"o}ngyi and Paul Heymann and Hector Garcia-Molina", title = "Combating spam in tagging systems: an evaluation", journal = j-TWEB, volume = "2", number = "4", pages = "22:1--22:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1409220.1409225", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:06 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Tagging systems allow users to interactively annotate a pool of shared resources using descriptive strings called {\em tags}. Tags are used to guide users to interesting resources and help them build communities that share their expertise and resources. As tagging systems are gaining in popularity, they become more susceptible to {\em tag spam\/}: misleading tags that are generated in order to increase the visibility of some resources or simply to confuse users. Our goal is to understand this problem better. In particular, we are interested in answers to questions such as: How many malicious users can a tagging system tolerate before results significantly degrade? What types of tagging systems are more vulnerable to malicious attacks? What would be the effort and the impact of employing a trusted moderator to find bad postings? Can a system automatically protect itself from spam, for instance, by exploiting user tag patterns? In a quest for answers to these questions, we introduce a framework for modeling tagging systems and user tagging behavior. We also describe a method for ranking documents matching a tag based on taggers' reliability. Using our framework, we study the behavior of existing approaches under malicious attacks and the impact of a moderator and our ranking method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "bookmarking systems; tag spam; Tagging; tagging models", } @Article{Rattenbury:2009:MEP, author = "Tye Rattenbury and Mor Naaman", title = "Methods for extracting place semantics from {Flickr} tags", journal = j-TWEB, volume = "3", number = "1", pages = "1:1--1:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1462148.1462149", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:15 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We describe an approach for extracting semantics for tags, unstructured text-labels assigned to resources on the Web, based on each tag's usage patterns. In particular, we focus on the problem of extracting place semantics for tags that are assigned to photos on Flickr, a popular-photo sharing Web site that supports location (latitude/longitude) metadata for photos. We propose the adaptation of two baseline methods, inspired by well-known burst-analysis techniques, for the task; we also describe two novel methods, TagMaps and scale-structure identification. We evaluate the methods on a subset of Flickr data. We show that our scale-structure identification method outperforms existing techniques and that a hybrid approach generates further improvements (achieving 85\% precision at 81\% recall). The approach and methods described in this work can be used in other domains such as geo-annotated Web pages, where text terms can be extracted and associated with usage patterns.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "places; semantics; Tagging systems; tags", } @Article{Jackson:2009:PBD, author = "Collin Jackson and Adam Barth and Andrew Bortz and Weidong Shao and Dan Boneh", title = "Protecting browsers from {DNS} rebinding attacks", journal = j-TWEB, volume = "3", number = "1", pages = "2:1--2:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1462148.1462150", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:15 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "DNS rebinding attacks subvert the same-origin policy of browsers, converting them into open network proxies. Using DNS rebinding, an attacker can circumvent organizational and personal firewalls, send spam email, and defraud pay-per-click advertisers. We evaluate the cost effectiveness of mounting DNS rebinding attacks, finding that an attacker requires less than \$100 to hijack 100,000 IP addresses. We analyze defenses to DNS rebinding attacks, including improvements to the classic ``DNS pinning,'' and recommend changes to browser plug-ins, firewalls, and Web servers. Our defenses have been adopted by plug-in vendors and by a number of open-source firewall implementations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "click fraud; DNS; firewall; Same-origin policy; spam", } @Article{Bar-Yossef:2009:DCD, author = "Ziv Bar-Yossef and Idit Keidar and Uri Schonfeld", title = "Do not crawl in the {DUST}: {Different URLs with Similar Text}", journal = j-TWEB, volume = "3", number = "1", pages = "3:1--3:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1462148.1462151", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:15 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We consider the problem of DUST: Different URLs with Similar Text. Such duplicate URLs are prevalent in Web sites, as Web server software often uses aliases and redirections, and dynamically generates the same page from various different URL requests. We present a novel algorithm, {\em DustBuster}, for uncovering DUST; that is, for discovering rules that transform a given URL to others that are likely to have similar content. DustBuster mines DUST effectively from previous crawl logs or Web server logs, {\em without\/} examining page contents. Verifying these rules via sampling requires fetching few actual Web pages. Search engines can benefit from information about DUST to increase the effectiveness of crawling, reduce indexing overhead, and improve the quality of popularity statistics such as PageRank.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "antialiasing; crawling; duplicate detection; Search engines; URL normalization", } @Article{Xiao:2009:BSD, author = "Xiangye Xiao and Qiong Luo and Dan Hong and Hongbo Fu and Xing Xie and Wei-Ying Ma", title = "Browsing on small displays by transforming {Web} pages into hierarchically structured subpages", journal = j-TWEB, volume = "3", number = "1", pages = "4:1--4:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1462148.1462152", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:15 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We propose a new Web page transformation method to facilitate Web browsing on handheld devices such as Personal Digital Assistants (PDAs). In our approach, an original Web page that does not fit on the screen is transformed into a set of subpages, each of which fits on the screen. This transformation is done through slicing the original page into page blocks iteratively, with several factors considered. These factors include the size of the screen, the size of each page block, the number of blocks in each transformed page, the depth of the tree hierarchy that the transformed pages form, as well as the semantic coherence between blocks. We call the tree hierarchy of the transformed pages an SP-tree. In an SP-tree, an internal node consists of a textually enhanced thumbnail image with hyperlinks, and a leaf node is a block extracted from a subpage of the original Web page. We adaptively adjust the fanout and the height of the SP-tree so that each thumbnail image is clear enough for users to read, while at the same time, the number of clicks needed to reach a leaf page is few. Through this transformation algorithm, we preserve the contextual information in the original Web page and reduce scrolling. We have implemented this transformation module on a proxy server and have conducted usability studies on its performance. Our system achieved a shorter task completion time compared with that of transformations from the Opera browser in nine of ten tasks. The average improvement on familiar pages was 44\%. The average improvement on unfamiliar pages was 37\%. Subjective responses were positive.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Proxy; slicing tree; small displays; thumbnails; Web browsing; Web page adaptation", } @Article{Gabrilovich:2009:CSQ, author = "Evgeniy Gabrilovich and Andrei Broder and Marcus Fontoura and Amruta Joshi and Vanja Josifovski and Lance Riedel and Tong Zhang", title = "Classifying search queries using the {Web} as a source of knowledge", journal = j-TWEB, volume = "3", number = "2", pages = "5:1--5:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1513876.1513877", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:23 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We propose a methodology for building a robust query classification system that can identify thousands of query classes, while dealing in real time with the query volume of a commercial Web search engine. We use a pseudo relevance feedback technique: given a query, we determine its topic by classifying the Web search results retrieved by the query. Motivated by the needs of search advertising, we primarily focus on rare queries, which are the hardest from the point of view of machine learning, yet in aggregate account for a considerable fraction of search engine traffic. Empirical evaluation confirms that our methodology yields a considerably higher classification accuracy than previously reported. We believe that the proposed methodology will lead to better matching of online ads to rare queries and overall to a better user experience.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Pseudo relevance feedback; query classification; Web search", } @Article{Reay:2009:LSE, author = "Ian Reay and Scott Dick and James Miller", title = "A large-scale empirical study of {P3P} privacy policies: {Stated} actions vs. legal obligations", journal = j-TWEB, volume = "3", number = "2", pages = "6:1--6:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1513876.1513878", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:23 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Numerous studies over the past ten years have shown that concern for personal privacy is a major impediment to the growth of e-commerce. These concerns are so serious that most if not all consumer watchdog groups have called for some form of privacy protection for Internet users. In response, many nations around the world, including all European Union nations, Canada, Japan, and Australia, have enacted national legislation establishing mandatory safeguards for personal privacy. However, recent evidence indicates that Web sites might not be adhering to the requirements of this legislation. The goal of this study is to examine the posted privacy policies of Web sites, and compare these statements to the legal mandates under which the Web sites operate. We harvested all available P3P (Platform for Privacy Preferences Protocol) documents from the 100,000 most popular Web sites (over 3,000 full policies, and another 3,000 compact policies). This allows us to undertake an automated analysis of adherence to legal mandates on Web sites that most impact the average Internet user. Our findings show that Web sites generally do not even claim to follow all the privacy-protection mandates in their legal jurisdiction (we do not examine actual practice, only posted policies). Furthermore, this general statement appears to be true for every jurisdiction with privacy laws and any significant number of P3P policies, including European Union nations, Canada, Australia, and Web sites in the USA Safe Harbor program.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "electronic commerce; legislation and enforcement; P3P; Privacy protection", } @Article{Dourisboure:2009:ECD, author = "Yon Dourisboure and Filippo Geraci and Marco Pellegrini", title = "Extraction and classification of dense implicit communities in the {Web} graph", journal = j-TWEB, volume = "3", number = "2", pages = "7:1--7:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1513876.1513879", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Fri Apr 24 18:18:23 MDT 2009", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The World Wide Web (WWW) is rapidly becoming important for society as a medium for sharing data, information, and services, and there is a growing interest in tools for understanding collective behavior and emerging phenomena in the WWW. In this article we focus on the problem of searching and classifying {\em communities\/} in the Web. Loosely speaking a community is a group of pages related to a common interest. More formally, communities have been associated in the computer science literature with the existence of a locally dense subgraph of the Web graph (where Web pages are nodes and hyperlinks are arcs of the Web graph). The core of our contribution is a new scalable algorithm for finding relatively dense subgraphs in massive graphs. We apply our algorithm on Web graphs built on three publicly available large crawls of the Web (with raw sizes up to 120M nodes and 1G arcs). The effectiveness of our algorithm in finding dense subgraphs is demonstrated experimentally by embedding artificial communities in the Web graph and counting how many of these are blindly found. Effectiveness increases with the size and density of the communities: it is close to 100\% for communities of thirty nodes or more (even at low density). It is still about 80\% even for communities of twenty nodes with density over 50\% of the arcs present. At the lower extremes the algorithm catches 35\% of dense communities made of ten nodes. We also develop some sufficient conditions for the detection of a community under some local graph models and not-too-restrictive hypotheses. We complete our {\em Community Watch\/} system by clustering the communities found in the Web graph into homogeneous groups by topic and labeling each group by representative keywords.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "communities; detection of dense subgraph; Web graph", } @Article{Lee:2009:ISB, author = "Hsin-Tsang Lee and Derek Leonard and Xiaoming Wang and Dmitri Loguinov", title = "{IRLbot}: {Scaling} to 6 billion pages and beyond", journal = j-TWEB, volume = "3", number = "3", pages = "8:1--8:??", month = jun, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1541822.1541823", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article shares our experience in designing a Web crawler that can download billions of pages using a single-server implementation and models its performance. We first show that current crawling algorithms cannot effectively cope with the sheer volume of URLs generated in large crawls, highly branching spam, legitimate multimillion-page blog sites, and infinite loops created by server-side scripts. We then offer a set of techniques for dealing with these issues and test their performance in an implementation we call IRLbot. In our recent experiment that lasted 41 days, IRLbot running on a single server successfully crawled 6.3 billion valid HTML pages (7.6 billion connection requests) and sustained an average download rate of 319 mb/s (1,789 pages/s). Unlike our prior experiments with algorithms proposed in related work, this version of IRLbot did not experience any bottlenecks and successfully handled content from over 117 million hosts, parsed out 394 billion links, and discovered a subset of the Web graph with 41 billion unique nodes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "crawling; IRLbot; large scale", } @Article{Tappenden:2009:CDS, author = "Andrew F. Tappenden and James Miller", title = "Cookies: a deployment study and the testing implications", journal = j-TWEB, volume = "3", number = "3", pages = "9:1--9:??", month = jun, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1541822.1541824", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The results of an extensive investigation of cookie deployment amongst 100,000 Internet sites are presented. Cookie deployment is found to be approaching universal levels and hence there exists an associated need for relevant Web and software engineering processes, specifically testing strategies which actively consider cookies. The semi-automated investigation demonstrates that over two-thirds of the sites studied deploy cookies. The investigation specifically examines the use of first-party, third-party, sessional, and persistent cookies within Web-based applications, identifying the presence of a P3P policy and dynamic Web technologies as major predictors of cookie usage. The results are juxtaposed with the lack of testing strategies present in the literature. A number of real-world examples, including two case studies are presented, further accentuating the need for comprehensive testing strategies for Web-based applications. The use of antirandom test case generation is explored with respect to the testing issues discussed. Finally, a number of seeding vectors are presented, providing a basis for testing cookies within Web-based applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Cookies; Internet browser; software testing; Web engineering; Web technologies", } @Article{Comuzzi:2009:FQB, author = "Marco Comuzzi and Barbara Pernici", title = "A framework for {QoS}-based {Web} service contracting", journal = j-TWEB, volume = "3", number = "3", pages = "10:1--10:??", month = jun, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1541822.1541825", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The extensive adoption of Web service-based applications in dynamic business scenarios, such as on-demand computing or highly reconfigurable virtual enterprises, advocates for methods and tools for the management of Web service nonfunctional aspects, such as Quality of Service (QoS). Concerning contracts on Web service QoS, the literature has mostly focused on the contract definition and on mechanisms for contract enactment, such as the monitoring of the satisfaction of negotiated QoS guarantees. In this context, this article proposes a framework for the automation of the Web service contract specification and establishment. An extensible model for defining both domain-dependent and domain-independent Web service QoS dimensions and a method for the automation of the contract establishment phase are proposed. We describe a matchmaking algorithm for the ranking of functionally equivalent services, which orders services on the basis of their ability to fulfill the service requestor requirements, while maintaining the price below a specified budget. We also provide an algorithm for the configuration of the negotiable part of the QoS Service-Level Agreement (SLA), which is used to configure the agreement with the top-ranked service identified in the matchmaking phase. Experimental results show that, in a utility theory perspective, the contract establishment phase leads to efficient outcomes. We envision two advanced application scenarios for the Web service contracting framework proposed in this article. First, it can be used to enhance Web services self-healing properties in reaction to QoS-related service failures; second, it can be exploited in process optimization for the online reconfiguration of candidate Web services QoS SLAs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "matchmaking; negotiation; QoS; service selection; SLA; Web service", } @Article{Pilioura:2009:UPD, author = "Thomi Pilioura and Aphrodite Tsalgatidou", title = "Unified publication and discovery of semantic {Web} services", journal = j-TWEB, volume = "3", number = "3", pages = "11:1--11:??", month = jun, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1541822.1541826", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:38 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The challenge of publishing and discovering Web services has recently received lots of attention. Various solutions to this problem have been proposed which, apart from their offered advantages, suffer the following disadvantages: (i) most of them are syntactic-based, leading to poor precision and recall, (ii) they are not scalable to large numbers of services, and (iii) they are incompatible, thus yielding in cumbersome service publication and discovery. This article presents the principles, the functionality, and the design of PYRAMID-S which addresses these disadvantages by providing a scalable framework for unified publication and discovery of semantically enhanced services over heterogeneous registries. PYRAMID-S uses a hybrid peer-to-peer topology to organize Web service registries based on domains. In such a topology, each Registry retains its autonomy, meaning that it can use the publication and discovery mechanisms as well as the ontology of its choice. The viability of this approach is demonstrated through the implementation and experimental analysis of a prototype.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "evaluation; PYRAMID-S; scalable; semantic Web services; unified; Web service discovery; Web service publication", } @Article{Golbeck:2009:TNP, author = "Jennifer Golbeck", title = "Trust and nuanced profile similarity in online social networks", journal = j-TWEB, volume = "3", number = "4", pages = "12:1--12:??", month = sep, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1594173.1594174", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:43 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Singh:2009:SSO, author = "Aameek Singh and Mudhakar Srivatsa and Ling Liu", title = "Search-as-a-service: {Outsourced} search over outsourced storage", journal = j-TWEB, volume = "3", number = "4", pages = "13:1--13:??", month = sep, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1594173.1594175", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:43 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Robu:2009:ECS, author = "Valentin Robu and Harry Halpin and Hana Shepherd", title = "Emergence of consensus and shared vocabularies in collaborative tagging systems", journal = j-TWEB, volume = "3", number = "4", pages = "14:1--14:??", month = sep, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1594173.1594176", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:43 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zheng:2010:UTM, author = "Yu Zheng and Yukun Chen and Quannan Li and Xing Xie and Wei-Ying Ma", title = "Understanding transportation modes based on {GPS} data for {Web} applications", journal = j-TWEB, volume = "4", number = "1", pages = "1:1--1:??", month = jan, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1658373.1658374", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:45 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Li:2010:DSO, author = "Guoli Li and Vinod Muthusamy and Hans-Arno Jacobsen", title = "A distributed service-oriented architecture for business process execution", journal = j-TWEB, volume = "4", number = "1", pages = "2:1--2:??", month = jan, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1658373.1658375", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:45 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Montali:2010:DSV, author = "Marco Montali and Maja Pesic and Wil M. P. van der Aalst and Federico Chesani and Paola Mello and Sergio Storari", title = "Declarative specification and verification of service choreographies", journal = j-TWEB, volume = "4", number = "1", pages = "3:1--3:??", month = jan, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1658373.1658376", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Mar 16 09:28:45 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Almishari:2010:APD, author = "Mishari Almishari and Xiaowei Yang", title = "Ads-portal domains: {Identification} and measurements", journal = j-TWEB, volume = "4", number = "2", pages = "4:1--4:??", month = apr, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1734200.1734201", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "An ads-portal domain refers to a Web domain that shows only advertisements, served by a third-party advertisement syndication service, in the form of ads listing. We develop a machine-learning-based classifier to identify ads-portal domains, which has 96\% accuracy. We use this classifier to measure the prevalence of ads-portal domains on the Internet. Surprisingly, 28.3/25\% of the (two-level) {\tt *.com} /{\tt *.net} web domains are ads-portal domains. Also, 41/39.8\% of {\tt *.com} /{\tt *.net} ads-portal domains are typos of well-known domains, also known as typo-squatting domains. In addition, we use the classifier along with DNS trace files to estimate how often Internet users visit ads-portal domains. It turns out that $ \approx 5 \% $ of the two-level {\tt *.com}, {\tt *.net}, {\tt *.org}, {\tt *.biz} and {\tt *.info} web domains on the traces are ads-portal domains and $ \approx 50 \% $ of these accessed ads-portal domains are typos. These numbers show that ads-portal domains and typo-squatting ads-portal domains are prevalent on the Internet and successful in attracting many visits. Our classifier represents a step towards better categorizing the web documents. It can also be helpful to search engines ranking algorithms, helpful in identifying web spams that redirects to ads-portal domains, and used to discourage access to typo-squatting ads-portal domains.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Ads-portal; advertisement syndication; data mining; parked domain; parking service; Web characterization", } @Article{Jurca:2010:RIB, author = "Radu Jurca and Florent Garcin and Arjun Talwar and Boi Faltings", title = "Reporting incentives and biases in online review forums", journal = j-TWEB, volume = "4", number = "2", pages = "5:1--5:??", month = apr, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1734200.1734202", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Online reviews have become increasingly popular as a way to judge the quality of various products and services. However, recent work demonstrates that the absence of reporting incentives leads to a biased set of reviews that may not reflect the true quality. In this paper, we investigate underlying factors that influence users when reporting feedback. In particular, we study both reporting incentives and reporting biases observed in a widely used review forum, the Tripadvisor Web site. We consider three sources of information: first, the numerical ratings left by the user for different aspects of quality; second, the textual comment accompanying a review; third, the patterns in the time sequence of reports. We first show that groups of users who discuss a certain feature at length are more likely to agree in their ratings. Second, we show that users are more motivated to give feedback when they perceive a greater risk involved in a transaction. Third, a user's rating partly reflects the difference between true quality and prior expectation of quality, as inferred from previous reviews. We finally observe that because of these biases, when averaging review scores there are strong differences between the mean and the median. We speculate that the median may be a better way to summarize the ratings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Online reviews; reputation mechanisms", } @Article{Vlachos:2010:ODB, author = "Michail Vlachos and Suleyman S. Kozat and Philip S. Yu", title = "Optimal distance bounds for fast search on compressed time-series query logs", journal = j-TWEB, volume = "4", number = "2", pages = "6:1--6:??", month = apr, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1734200.1734203", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Consider a database of time-series, where each datapoint in the series records the total number of users who asked for a specific query at an Internet search engine. Storage and analysis of such logs can be very beneficial for a search company from multiple perspectives. First, from a data organization perspective, because query Weblogs capture important trends and statistics, they can help enhance and optimize the search experience (keyword recommendation, discovery of news events). Second, Weblog data can provide an important polling mechanism for the microeconomic aspects of a search engine, since they can facilitate and promote the advertising facet of the search engine (understand what users request and when they request it).\par Due to the sheer amount of time-series Weblogs, manipulation of the logs in a compressed form is an impeding necessity for fast data processing and compact storage requirements. Here, we explicate how to compute the lower and upper distance bounds on the time-series logs when working directly on their compressed form. Optimal distance estimation means tighter bounds, leading to better candidate selection/elimination and ultimately faster search performance. Our derivation of the optimal distance bounds is based on the careful analysis of the problem using optimization principles. The experimental evaluation suggests a clear performance advantage of the proposed method, compared to previous compression/search techniques. The presented method results in a 10--30\% improvement on distance estimations, which in turn leads to 25--80\% improvement on the search performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Fraternali:2010:ERI, author = "Piero Fraternali and Sara Comai and Alessandro Bozzon and Giovanni Toffetti Carughi", title = "Engineering rich {Internet} applications with a model-driven approach", journal = j-TWEB, volume = "4", number = "2", pages = "7:1--7:??", month = apr, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1734200.1734204", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:32 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Rich Internet Applications (RIAs) have introduced powerful novel functionalities into the Web architecture, borrowed from client-server and desktop applications. The resulting platforms allow designers to improve the user's experience, by exploiting client-side data and computation, bidirectional client-server communication, synchronous and asynchronous events, and rich interface widgets. However, the rapid evolution of RIA technologies challenges the Model-Driven Development methodologies that have been successfully applied in the past decade to traditional Web solutions. This paper illustrates an evolutionary approach for incorporating a wealth of RIA features into an existing Web engineering methodology and notation. The experience demonstrates that it is possible to model RIA application requirements at a high-level using a platform-independent notation, and generate the client-side and server-side code automatically. The resulting approach is evaluated in terms of expressive power, ease of use, and implementability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "information interfaces and presentation; information storage and retrieval; model-driven development; Rich Internet applications; Web engineering", } @Article{Xiao:2010:LSS, author = "Xiangye Xiao and Qiong Luo and Zhisheng Li and Xing Xie and Wei-Ying Ma", title = "A large-scale study on map search logs", journal = j-TWEB, volume = "4", number = "3", pages = "8:1--8:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1806916.1806917", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Map search engines, such as Google Maps, Yahoo! Maps, and Microsoft Live Maps, allow users to explicitly specify a target geographic location, either in keywords or on the map, and to search businesses, people, and other information of that location. In this article, we report a first study on a million-entry map search log. We identify three key attributes of a map search record --- the keyword query, the target location and the user location, and examine the characteristics of these three dimensions separately as well as the associations between them. Comparing our results with those previously reported on logs of general search engines and mobile search engines, including those for geographic queries, we discover the following unique features of map search: (1) People use longer queries and modify queries more frequently in a session than in general search and mobile search; People view fewer result pages per query than in general search; (2) The popular query topics in map search are different from those in general search and mobile search; (3) The target locations in a session change within 50 kilometers for almost 80\% of the sessions; (4) Queries, search target locations and user locations (both at the city level) all follow the power law distribution; (5) One third of queries are issued for target locations within 50 kilometers from the user locations; (6) The distribution of a query over target locations appears to follow the geographic location of the queried entity.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "local search; log analysis; Map search; query categorization; search interface; user behavior", } @Article{Malak:2010:MWQ, author = "Ghazwa Malak and Houari Sahraoui and Linda Badri and Mourad Badri", title = "Modeling {Web} quality using a probabilistic approach: an empirical validation", journal = j-TWEB, volume = "4", number = "3", pages = "9:1--9:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1806916.1806918", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web-based applications are software systems that continuously evolve to meet users' needs and to adapt to new technologies. Assuring their quality is then a difficult, but essential task. In fact, a large number of factors can affect their quality. Considering these factors and their interaction involves managing uncertainty and subjectivity inherent to this kind of applications. In this article, we present a probabilistic approach for building Web quality models and the associated assessment method. The proposed approach is based on Bayesian Networks. A model is built following a four-step process consisting in collecting quality characteristics, refining them, building a model structure, and deriving the model parameters.\par The feasibility of the approach is illustrated on the important quality characteristic of {\em Navigability design}. To validate the produced model, we conducted an experimental study with 20 subjects and 40 web pages. The results obtained show that the scores given by the used model are strongly correlated with navigability as perceived and experienced by the users.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Bayesian networks; Navigability design; probabilistic approach; quality evaluation; Web applications", } @Article{Poblete:2010:PPQ, author = "Barbara Poblete and Myra Spiliopoulou and Ricardo Baeza-Yates", title = "Privacy-preserving query log mining for business confidentiality protection", journal = j-TWEB, volume = "4", number = "3", pages = "10:1--10:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1806916.1806919", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We introduce the concern of confidentiality protection of business information for the publication of search engine query logs and derived data. We study business confidentiality, as the protection of nonpublic data from institutions, such as companies and people in the public eye. In particular, we relate this concern to the involuntary exposure of confidential Web site information, and we transfer this problem into the field of privacy-preserving data mining. We characterize the possible adversaries interested in disclosing Web site confidential data and the attack strategies that they could use. These attacks are based on different vulnerabilities found in query log for which we present several anonymization heuristics to prevent them. We perform an experimental evaluation to estimate the remaining utility of the log after the application of our anonymization techniques. Our experimental results show that a query log can be anonymized against these specific attacks while retaining a significant volume of useful data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Privacy preservation; queries; query log publication; Web sites", } @Article{Consens:2010:EXW, author = "Mariano P. Consens and Ren{\'e}e J. Miller and Flavio Rizzolo and Alejandro A. Vaisman", title = "Exploring {XML} {Web} collections with {DescribeX}", journal = j-TWEB, volume = "4", number = "3", pages = "11:1--11:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1806916.1806920", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "As Web applications mature and evolve, the nature of the semistructured data that drives these applications also changes. An important trend is the need for increased flexibility in the structure of Web documents. Hence, applications cannot rely solely on schemas to provide the complex knowledge needed to visualize, use, query and manage documents. Even when XML Web documents are valid with regard to a schema, the actual structure of such documents may exhibit significant variations across collections for several reasons: the schema may be very lax (e.g., RSS feeds), the schema may be large and different subsets of it may be used in different documents (e.g., industry standards like UBL), or open content models may allow arbitrary schemas to be mixed (e.g., RSS extensions like those used for podcasting). For these reasons, many applications that incorporate XPath queries to process a large Web document collection require an understanding of the actual structure present in the collection, and not just the schema.\par To support modern Web applications, we introduce DescribeX, a powerful framework that is capable of describing complex XML summaries of Web collections. DescribeX supports the construction of heterogeneous summaries that can be declaratively defined and refined by means of axis path regular expression (AxPREs). AxPREs provide the flexibility necessary for declaratively defining complex mappings between instance nodes (in the documents) and summary nodes. These mappings are capable of expressing order and cardinality, among other properties, which can significantly help in the understanding of the structure of large collections of XML documents and enhance the performance of Web applications over these collections. DescribeX captures most summary proposals in the literature by providing (for the first time) a common declarative definition for them. Experimental results demonstrate the scalability of DescribeX summary operations (summary creation, as well as refinement and stabilization, two key enablers for tailoring summaries) on multi-gigabyte Web collections.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Semistructured data; structural summaries; XML; XPath", } @Article{Adams:2010:DLS, author = "Brett Adams and Dinh Phung and Svetha Venkatesh", title = "Discovery of latent subcommunities in a blog's readership", journal = j-TWEB, volume = "4", number = "3", pages = "12:1--12:??", month = jul, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1806916.1806921", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Sat Aug 14 15:42:40 MDT 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The blogosphere has grown to be a mainstream forum of social interaction as well as a commercially attractive source of information and influence. Tools are needed to better understand how communities that adhere to individual blogs are constituted in order to facilitate new personal, socially-focused browsing paradigms, and understand how blog content is consumed, which is of interest to blog authors, big media, and search. We present a novel approach to blog subcommunity characterization by modeling individual blog readers using mixtures of an extension to the LDA family that jointly models phrases and time, Ngram Topic over Time (NTOT), and cluster with a number of similarity measures using Affinity Propagation. We experiment with two datasets: a small set of blogs whose authors provide feedback, and a set of popular, highly commented blogs, which provide indicators of algorithm scalability and interpretability without prior knowledge of a given blog. The results offer useful insight to the blog authors about their commenting community, and are observed to offer an integrated perspective on the topics of discussion and members engaged in those discussions for unfamiliar blogs. Our approach also holds promise as a component of solutions to related problems, such as online entity resolution and role discovery.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "affinity propagation; Blog; topic models; Web communities", } @Article{Kiciman:2010:APR, author = "Emre Kiciman and Benjamin Livshits", title = "{AjaxScope}: a Platform for Remotely Monitoring the Client-Side Behavior of {Web 2.0} Applications", journal = j-TWEB, volume = "4", number = "4", pages = "13:1--13:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1841909.1841910", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bex:2010:LDR, author = "Geert Jan Bex and Wouter Gelade and Frank Neven and Stijn Vansummeren", title = "Learning Deterministic Regular Expressions for the Inference of Schemas from {XML} Data", journal = j-TWEB, volume = "4", number = "4", pages = "14:1--14:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1841909.1841911", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bailey:2010:MHQ, author = "Peter Bailey and Ryen W. White and Han Liu and Giridhar Kumaran", title = "Mining Historic Query Trails to Label Long and Rare Search Engine Queries", journal = j-TWEB, volume = "4", number = "4", pages = "15:1--15:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1841909.1841912", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Claude:2010:FCW, author = "Francisco Claude and Gonzalo Navarro", title = "Fast and Compact {Web} Graph Representations", journal = j-TWEB, volume = "4", number = "4", pages = "16:1--16:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1841909.1841913", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Swaminathan:2010:RRM, author = "Ashwin Swaminathan and Renan G. Cattelan and Ydo Wexler and Cherian V. Mathew and Darko Kirovski", title = "Relating Reputation and Money in Online Markets", journal = j-TWEB, volume = "4", number = "4", pages = "17:1--17:??", month = sep, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1841909.1841914", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Nov 23 12:48:27 MST 2010", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Riedl:2011:ISI, author = "John Riedl and Barry Smyth", title = "Introduction to special issue on recommender systems", journal = j-TWEB, volume = "5", number = "1", pages = "1:1--1:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921591.1921592", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cacheda:2011:CCF, author = "Fidel Cacheda and V{\'\i}ctor Carneiro and Diego Fern{\'a}ndez and Vreixo Formoso", title = "Comparison of collaborative filtering algorithms: Limitations of current techniques and proposals for scalable, high-performance recommender systems", journal = j-TWEB, volume = "5", number = "1", pages = "2:1--2:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921591.1921593", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Umyarov:2011:UEA, author = "Akhmed Umyarov and Alexander Tuzhilin", title = "Using external aggregate ratings for improving individual recommendations", journal = j-TWEB, volume = "5", number = "1", pages = "3:1--3:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921591.1921594", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Song:2011:ATR, author = "Yang Song and Lu Zhang and C. Lee Giles", title = "Automatic tag recommendation algorithms for social recommender systems", journal = j-TWEB, volume = "5", number = "1", pages = "4:1--4:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921591.1921595", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zheng:2011:RFL, author = "Yu Zheng and Lizhu Zhang and Zhengxin Ma and Xing Xie and Wei-Ying Ma", title = "Recommending friends and locations based on individual location history", journal = j-TWEB, volume = "5", number = "1", pages = "5:1--5:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921591.1921596", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Mon Mar 28 11:56:06 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wu:2011:TDQ, author = "Mingfang Wu and Falk Scholer and Andrew Turpin", title = "Topic Distillation with Query-Dependent Link Connections and Page Characteristics", journal = j-TWEB, volume = "5", number = "2", pages = "6:1--6:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961660", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Hurley:2011:HBP, author = "John Hurley and Emi Garcia-Palacios and Sakir Sezer", title = "Host-Based {P2P} Flow Identification and Use in Real-Time", journal = j-TWEB, volume = "5", number = "2", pages = "7:1--7:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961661", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Mitra:2011:CWB, author = "Siddharth Mitra and Mayank Agrawal and Amit Yadav and Niklas Carlsson and Derek Eager and Anirban Mahanti", title = "Characterizing {Web}-Based Video Sharing Workloads", journal = j-TWEB, volume = "5", number = "2", pages = "8:1--8:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961662", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Ozcan:2011:CAS, author = "Rifat Ozcan and Ismail Sengor Altingovde and {\"O}zg{\"u}r Ulusoy", title = "Cost-Aware Strategies for Query Result Caching in {Web} Search Engines", journal = j-TWEB, volume = "5", number = "2", pages = "9:1--9:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961663", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Valderas:2011:SRS, author = "Pedro Valderas and Vicente Pelechano", title = "A Survey of Requirements Specification in Model-Driven Development of {Web} Applications", journal = j-TWEB, volume = "5", number = "2", pages = "10:1--10:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961664", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Grier:2011:DIO, author = "Chris Grier and Shuo Tang and Samuel T. King", title = "Designing and Implementing the {OP} and {OP2} {Web} Browsers", journal = j-TWEB, volume = "5", number = "2", pages = "11:1--11:??", month = may, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1961659.1961665", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Tue Jun 7 18:44:15 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Koutsonikola:2011:CDL, author = "Vassiliki Koutsonikola and Athena Vakali", title = "A Clustering-Driven {LDAP} Framework", journal = j-TWEB, volume = "5", number = "3", pages = "12:1--12:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993053.1993054", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Paci:2011:AAC, author = "Federica Paci and Massimo Mecella and Mourad Ouzzani and Elisa Bertino", title = "{ACConv} -- An Access Control Model for Conversational {Web} Services", journal = j-TWEB, volume = "5", number = "3", pages = "13:1--13:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993053.1993055", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zeginis:2011:CDR, author = "Dimitris Zeginis and Yannis Tzitzikas and Vassilis Christophides", title = "On Computing Deltas of {RDF/S} Knowledge Bases", journal = j-TWEB, volume = "5", number = "3", pages = "14:1--14:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993053.1993056", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Baykan:2011:CSF, author = "Eda Baykan and Monika Henzinger and Ludmila Marian and Ingmar Weber", title = "A Comprehensive Study of Features and Algorithms for {URL}-Based Topic Classification", journal = j-TWEB, volume = "5", number = "3", pages = "15:1--15:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993053.1993057", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Tuchinda:2011:BMD, author = "Rattapoom Tuchinda and Craig A. Knoblock and Pedro Szekely", title = "Building Mashups by Demonstration", journal = j-TWEB, volume = "5", number = "3", pages = "16:1--16:??", month = jul, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993053.1993058", ISSN = "1559-1131 (print), 1559-114X (electronic)", bibdate = "Thu Aug 18 13:57:29 MDT 2011", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Alzoubi:2011:PAA, author = "Hussein A. Alzoubi and Seungjoon Lee and Michael Rabinovich and Oliver Spatscheck and Jacobus {Van Der Merwe}", title = "A Practical Architecture for an {Anycast CDN}", journal = j-TWEB, volume = "5", number = "4", pages = "17:1--17:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2019643.2019644", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "IP Anycast has many attractive features for any service that involve the replication of multiple instances across the Internet. IP Anycast allows multiple instances of the same service to be `naturally' discovered, and requests for this service to be delivered to the closest instance. However, while briefly considered as an enabler for content delivery networks (CDNs) when they first emerged, IP Anycast was deemed infeasible in that environment. The main reasons for this decision were the lack of load awareness of IP Anycast and unwanted side effects of Internet routing changes on the IP Anycast mechanism. In this article we re-evaluate IP Anycast for CDNs by proposing a load-aware IP Anycast CDN architecture. Our architecture is prompted by recent developments in route control technology, as well as better understanding of the behavior of IP Anycast in operational settings. Our architecture makes use of route control mechanisms to take server and network load into account to realize load-aware Anycast. We show that the resulting redirection requirements can be formulated as a Generalized Assignment Problem and present practical algorithms that address these requirements while at the same time limiting connection disruptions that plague regular IP Anycast. We evaluate our algorithms through trace based simulation using traces obtained from a production CDN network.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bar-Yossef:2011:ESE, author = "Ziv Bar-Yossef and Maxim Gurevich", title = "Efficient Search Engine Measurements", journal = j-TWEB, volume = "5", number = "4", pages = "18:1--18:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2019643.2019645", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We address the problem of externally measuring aggregate functions over documents indexed by search engines, like corpus size, index freshness, and density of duplicates in the corpus. State of the art estimators for such quantities [Bar-Yossef and Gurevich 2008b; Broder et al. 2006] are biased due to inaccurate approximation of the so called `document degrees'. In addition, the estimators in Bar-Yossef and Gurevich [2008b] are quite costly, due to their reliance on rejection sampling. We present new estimators that are able to overcome the bias introduced by approximate degrees. Our estimators are based on a careful implementation of an approximate importance sampling procedure. Comprehensive theoretical and empirical analysis of the estimators demonstrates that they have essentially no bias even in situations where document degrees are poorly approximated. By avoiding the costly rejection sampling approach, our new importance sampling estimators are significantly more efficient than the estimators proposed in Bar-Yossef and Gurevich [2008b]. Furthermore, building on an idea from Broder et al. [2006], we discuss Rao-Blackwellization as a generic method for reducing variance in search engine estimators. We show that Rao-Blackwellizing our estimators results in performance improvements, without compromising accuracy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Gill:2011:COU, author = "Phillipa Gill and Martin Arlitt and Niklas Carlsson and Anirban Mahanti and Carey Williamson", title = "Characterizing Organizational Use of {Web}-Based Services: Methodology, Challenges, Observations, and Insights", journal = j-TWEB, volume = "5", number = "4", pages = "19:1--19:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2019643.2019646", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Today's Web provides many different functionalities, including communication, entertainment, social networking, and information retrieval. In this article, we analyze traces of HTTP activity from a large enterprise and from a large university to identify and characterize Web-based service usage. Our work provides an initial methodology for the analysis of Web-based services. While it is nontrivial to identify the classes, instances, and providers for each transaction, our results show that most of the traffic comes from a small subset of providers, which can be classified manually. Furthermore, we assess both qualitatively and quantitatively how the Web has evolved over the past decade, and discuss the implications of these changes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Singla:2011:CBC, author = "Adish Singla and Ingmar Weber", title = "Camera Brand Congruence and Camera Model Propagation in the {Flickr} Social Graph", journal = j-TWEB, volume = "5", number = "4", pages = "20:1--20:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2019643.2019647", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Given that my friends on Flickr use cameras of brand X, am I more likely to also use a camera of brand X? Given that one of these friends changes her brand, am I likely to do the same? Do new camera models pop up uniformly in the friendship graph? Or do early adopters then `convert' their friends? Which factors influence the conversion probability of a user? These are the kind of questions addressed in this work. Direct applications involve personalized advertising in social networks. For our study, we crawled a complete connected component of the Flickr friendship graph with a total of 67M edges and 3.9M users. 1.2M of these users had at least one public photograph with valid model metadata, which allowed us to assign camera brands and models to users and time slots. Similarly, we used, where provided in a user's profile, information about a user's geographic location and the groups joined on Flickr. Concerning brand congruence, our main findings are the following. First, a pair of friends on Flickr has a higher probability of being congruent, that is, using the same brand, compared to two random users (27\% vs. 19\%). Second, the degree of congruence goes up for pairs of friends (i) in the same country (29\%), (ii) who both only have very few friends (30\%), and (iii) with a very high cliqueness (38\%). Third, given that a user changes her camera model between March-May 2007 and March-May 2008, high cliqueness friends are more likely than random users to do the same (54\% vs. 48\%). Fourth, users using high-end cameras are far more loyal to their brand than users using point-and-shoot cameras, with a probability of staying with the same brand of 60\% vs 33\%, given that a new camera is bought. Fifth, these `expert' users' brand congruence reaches 66\% for high cliqueness friends. All these differences are statistically significant at 1\%. As for the propagation of new models in the friendship graph, we observe the following. First, the growth of connected components of users converted to a particular, new camera model differs distinctly from random growth. Second, the decline of dissemination of a particular model is close to random decline. This illustrates that users influence their friends to change to a particular new model, rather than from a particular old model. Third, having many converted friends increases the probability of the user to convert herself. Here differences between friends from the same or from different countries are more pronounced for point-and-shoot than for digital single-lens reflex users. Fourth, there was again a distinct difference between arbitrary friends and high cliqueness friends in terms of prediction quality for conversion.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Curlango-Rosas:2011:SSA, author = "Cecilia Curlango-Rosas and Gregorio A. Ponce and Gabriel A. Lopez-Morteo", title = "A Specialized Search Assistant for Learning Objects", journal = j-TWEB, volume = "5", number = "4", pages = "21:1--21:??", month = oct, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2019643.2019648", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:40 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Web holds a great quantity of material that can be used to enhance classroom instruction. However, it is not easy to retrieve this material with the search engines currently available. This study produced a specialized search assistant based on Google that significantly increases the number of instances in which teachers find the desired learning objects as compared to using this popular public search engine directly. Success in finding learning objects by study participants went from 80\% using Google alone to 96\% when using our search assistant in one scenario and, in another scenario, from a 40\% success rate with Google alone to 66\% with our assistant. This specialized search assistant implements features such as bilingual search and term suggestion which were requested by teacher participants to help improve their searches. Study participants evaluated the specialized search assistant and found it significantly easier to use and more useful than the popular search engine for the purpose of finding learning objects.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zhu:2012:CLS, author = "Guangyu Zhu and Gilad Mishne", title = "{ClickRank}: Learning Session-Context Models to Enrich {Web} Search Ranking", journal = j-TWEB, volume = "6", number = "1", pages = "1:1--1:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2109205.2109206", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "User browsing information, particularly non-search-related activity, reveals important contextual information on the preferences and intents of Web users. In this article, we demonstrate the importance of mining general Web user behavior data to improve ranking and other Web-search experience, with an emphasis on analyzing individual user sessions for creating aggregate models. In this context, we introduce ClickRank, an efficient, scalable algorithm for estimating Webpage and Website importance from general Web user-behavior data. We lay out the theoretical foundation of ClickRank based on an intentional surfer model and discuss its properties. We quantitatively evaluate its effectiveness regarding the problem of Web-search ranking, showing that it contributes significantly to retrieval performance as a novel Web-search feature. We demonstrate that the results produced by ClickRank for Web-search ranking are highly competitive with those produced by other approaches, yet achieved at better scalability and substantially lower computational costs. Finally, we discuss novel applications of ClickRank in providing enriched user Web-search experience, highlighting the usefulness of our approach for nonranking tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liu:2012:IWS, author = "Yiqun Liu and Fei Chen and Weize Kong and Huijia Yu and Min Zhang and Shaoping Ma and Liyun Ru", title = "Identifying {Web} Spam with the Wisdom of the Crowds", journal = j-TWEB, volume = "6", number = "1", pages = "2:1--2:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2109205.2109207", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Combating Web spam has become one of the top challenges for Web search engines. State-of-the-art spam-detection techniques are usually designed for specific, known types of Web spam and are incapable of dealing with newly appearing spam types efficiently. With user-behavior analyses from Web access logs, a spam page-detection algorithm is proposed based on a learning scheme. The main contributions are the following. (1) User-visiting patterns of spam pages are studied, and a number of user-behavior features are proposed for separating Web spam pages from ordinary pages. (2) A novel spam-detection framework is proposed that can detect various kinds of Web spam, including newly appearing ones, with the help of the user-behavior analysis. Experiments on large-scale practical Web access log data show the effectiveness of the proposed features and the detection framework.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Mesbah:2012:CAB, author = "Ali Mesbah and Arie van Deursen and Stefan Lenselink", title = "Crawling {Ajax}-Based {Web} Applications through Dynamic Analysis of User Interface State Changes", journal = j-TWEB, volume = "6", number = "1", pages = "3:1--3:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2109205.2109208", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Using JavaScript and dynamic DOM manipulation on the client side of Web applications is becoming a widespread approach for achieving rich interactivity and responsiveness in modern Web applications. At the same time, such techniques---collectively known as Ajax---shatter the concept of webpages with unique URLs, on which traditional Web crawlers are based. This article describes a novel technique for crawling Ajax-based applications through automatic dynamic analysis of user-interface-state changes in Web browsers. Our algorithm scans the DOM tree, spots candidate elements that are capable of changing the state, fires events on those candidate elements, and incrementally infers a state machine that models the various navigational paths and states within an Ajax application. This inferred model can be used in program comprehension and in analysis and testing of dynamic Web states, for instance, or for generating a static version of the application. In this article, we discuss our sequential and concurrent Ajax crawling algorithms. We present our open source tool called Crawljax, which implements the concepts and algorithms discussed in this article. Additionally, we report a number of empirical studies in which we apply our approach to a number of open-source and industrial Web applications and elaborate on the obtained results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Lauw:2012:QLO, author = "Hady W. Lauw and Ee-Peng Lim and Ke Wang", title = "Quality and Leniency in Online Collaborative Rating Systems", journal = j-TWEB, volume = "6", number = "1", pages = "4:1--4:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2109205.2109209", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Mar 16 12:37:41 MDT 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The emerging trend of social information processing has resulted in Web users' increased reliance on user-generated content contributed by others for information searching and decision making. Rating scores, a form of user-generated content contributed by reviewers in online rating systems, allow users to leverage others' opinions in the evaluation of objects. In this article, we focus on the problem of summarizing the rating scores given to an object into an overall score that reflects the object's quality. We observe that the existing approaches for summarizing scores largely ignores the effect of reviewers exercising different standards in assigning scores. Instead of treating all reviewers as equals, our approach models the leniency of reviewers, which refers to the tendency of a reviewer to assign higher scores than other coreviewers. Our approach is underlined by two insights: (1) The leniency of a reviewer depends not only on how the reviewer rates objects, but also on how other reviewers rate those objects and (2) The leniency of a reviewer and the quality of rated objects are mutually dependent. We develop the leniency-aware quality, or LQ model, which solves leniency and quality simultaneously. We introduce both an exact and a ranked solution to the model. Experiments on real-life and synthetic datasets show that LQ is more effective than comparable approaches. LQ is also shown to perform consistently better under different parameter settings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Ashman:2012:E, author = "Helen Ashman and Arun Iyengar and Marc Najork", title = "Editorial", journal = j-TWEB, volume = "6", number = "2", pages = "5:1--5:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180861.2180862", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{DeCapitaniDiVimercati:2012:ITM, author = "Sabrina {De Capitani Di Vimercati} and Sara Foresti and Sushil Jajodia and Stefano Paraboschi and Giuseppe Psaila and Pierangela Samarati", title = "Integrating trust management and access control in data-intensive {Web} applications", journal = j-TWEB, volume = "6", number = "2", pages = "6:1--6:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180861.2180863", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The widespread diffusion of Web-based services provided by public and private organizations emphasizes the need for a flexible solution for protecting the information accessible through Web applications. A promising approach is represented by credential-based access control and trust management. However, although much research has been done and several proposals exist, a clear obstacle to the realization of their benefits in data-intensive Web applications is represented by the lack of adequate support in the DBMSs. As a matter of fact, DBMSs are often responsible for the management of most of the information that is accessed using a Web browser or a Web service invocation. In this article, we aim at eliminating this gap, and present an approach integrating trust management with the access control of the DBMS. We propose a trust model with a SQL syntax and illustrate an algorithm for the efficient verification of a delegation path for certificates. Our solution nicely complements current trust management proposals allowing the efficient realization of the services of an advanced trust management model within current relational DBMSs. An important benefit of our approach lies in its potential for a robust end-to-end design of security for personal data in Web scenario, where vulnerabilities of Web applications cannot be used to violate the protection of the data residing on the database server. We also illustrate the implementation of our approach within an open-source DBMS discussing design choices and performance impact.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Alrifai:2012:HAE, author = "Mohammad Alrifai and Thomas Risse and Wolfgang Nejdl", title = "A hybrid approach for efficient {Web} service composition with end-to-end {QoS} constraints", journal = j-TWEB, volume = "6", number = "2", pages = "7:1--7:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180861.2180864", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Dynamic selection of Web services at runtime is important for building flexible and loosely-coupled service-oriented applications. An abstract description of the required services is provided at design-time, and matching service offers are located at runtime. With the growing number of Web services that provide the same functionality but differ in quality parameters (e.g., availability, response time), a decision needs to be made on which services should be selected such that the user's end-to-end QoS requirements are satisfied. Although very efficient, local selection strategy fails short in handling global QoS requirements. Solutions based on global optimization, on the other hand, can handle global constraints, but their poor performance renders them inappropriate for applications with dynamic and realtime requirements. In this article we address this problem and propose a hybrid solution that combines global optimization with local selection techniques to benefit from the advantages of both worlds. The proposed solution consists of two steps: first, we use mixed integer programming (MIP) to find the optimal decomposition of global QoS constraints into local constraints. Second, we use distributed local selection to find the best Web services that satisfy these local constraints. The results of experimental evaluation indicate that our approach significantly outperforms existing solutions in terms of computation time while achieving close-to-optimal results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Desnoyers:2012:MAM, author = "Peter Desnoyers and Timothy Wood and Prashant Shenoy and Rahul Singh and Sangameshwar Patil and Harrick Vin", title = "{Modellus}: Automated modeling of complex {Internet} data center applications", journal = j-TWEB, volume = "6", number = "2", pages = "8:1--8:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180861.2180865", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The rising complexity of distributed server applications in Internet data centers has made the tasks of modeling and analyzing their behavior increasingly difficult. This article presents Modellus, a novel system for automated modeling of complex web-based data center applications using methods from queuing theory, data mining, and machine learning. Modellus uses queuing theory and statistical methods to automatically derive models to predict the resource usage of an application and the workload it triggers; these models can be composed to capture multiple dependencies between interacting applications. Model accuracy is maintained by fast, distributed testing, automated relearning of models when they change, and methods to bound prediction errors in composite models. We have implemented a prototype of Modellus, deployed it on a data center testbed, and evaluated its efficacy for modeling and analysis of several distributed multitier web applications. Our results show that this feature-based modeling technique is able to make predictions across several data center tiers, and maintain predictive accuracy (typically 95\% or better) in the face of significant shifts in workload composition; we also demonstrate practical applications of the Modellus system to prediction and provisioning of real-world data center applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Aiello:2012:FPH, author = "Luca Maria Aiello and Alain Barrat and Rossano Schifanella and Ciro Cattuto and Benjamin Markines and Filippo Menczer", title = "Friendship prediction and homophily in social media", journal = j-TWEB, volume = "6", number = "2", pages = "9:1--9:??", month = may, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180861.2180866", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:48 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Social media have attracted considerable attention because their open-ended nature allows users to create lightweight semantic scaffolding to organize and share content. To date, the interplay of the social and topical components of social media has been only partially explored. Here, we study the presence of homophily in three systems that combine tagging social media with online social networks. We find a substantial level of topical similarity among users who are close to each other in the social network. We introduce a null model that preserves user activity while removing local correlations, allowing us to disentangle the actual local similarity between users from statistical effects due to the assortative mixing of user activity and centrality in the social network. This analysis suggests that users with similar interests are more likely to be friends, and therefore topical similarity measures among users based solely on their annotation metadata should be predictive of social links. We test this hypothesis on several datasets, confirming that social networks constructed from topical similarity capture actual friendship accurately. When combined with topological features, topical similarity achieves a link prediction accuracy of about 92\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Comai:2012:MDM, author = "Sara Comai and Davide Mazza", title = "A model-driven methodology to the content layout problem in {Web} applications", journal = j-TWEB, volume = "6", number = "3", pages = "10:1--10:38", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2344416.2344417", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/texbook3.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article presents a model-driven approach for the design of the layout in a complex Web application, where large amounts of data are accessed. The aim of this work is to reduce, as much as possible, repetitive tasks and to factor out common aspects into different kinds of rules that can be reused across different applications. In particular, exploiting the conceptual elements of the typical models used for the design of a Web application, it defines presentation and layout rules at different levels of abstraction and granularity. A procedure for the automatic layout of the content of a page is proposed and evaluated, and the layout of advanced Web applications is discussed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "Automatic contents layout; graphical visualization and rendering; Web applications design", } @Article{Merhav:2012:EIN, author = "Yuval Merhav and Filipe Mesquita and Denilson Barbosa and Wai Gen Yee and Ophir Frieder", title = "Extracting information networks from the blogosphere", journal = j-TWEB, volume = "6", number = "3", pages = "11:1--11:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2344416.2344418", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We study the problem of automatically extracting information networks formed by recognizable entities as well as relations among them from social media sites. Our approach consists of using state-of-the-art natural language processing tools to identify entities and extract sentences that relate such entities, followed by using text-clustering algorithms to identify the relations within the information network. We propose a new term-weighting scheme that significantly improves on the state-of-the-art in the task of relation extraction, both when used in conjunction with the standard tf $ \cdot $ idf scheme and also when used as a pruning filter. We describe an effective method for identifying benchmarks for open information extraction that relies on a curated online database that is comparable to the hand-crafted evaluation datasets in the literature. From this benchmark, we derive a much larger dataset which mimics realistic conditions for the task of open information extraction. We report on extensive experiments on both datasets, which not only shed light on the accuracy levels achieved by state-of-the-art open information extraction tools, but also on how to tune such tools for better results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Miliaraki:2012:FDS, author = "Iris Miliaraki and Manolis Koubarakis", title = "{FoXtrot}: Distributed structural and value {XML} filtering", journal = j-TWEB, volume = "6", number = "3", pages = "12:1--12:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2344416.2344419", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Publish/subscribe systems have emerged in recent years as a promising paradigm for offering various popular notification services. In this context, many XML filtering systems have been proposed to efficiently identify XML data that matches user interests expressed as queries in an XML query language like XPath. However, in order to offer XML filtering functionality on an Internet-scale, we need to deploy such a service in a distributed environment, avoiding bottlenecks that can deteriorate performance. In this work, we design and implement FoXtrot, a system for filtering XML data that combines the strengths of automata for efficient filtering and distributed hash tables for building a fully distributed system. Apart from structural-matching, performed using automata, we also discuss different methods for evaluating value-based predicates. We perform an extensive experimental evaluation of our system, FoXtrot, on a local cluster and on the PlanetLab network and demonstrate that it can index millions of user queries, achieving a high indexing and filtering throughput. At the same time, FoXtrot exhibits very good load-balancing properties and improves its performance as we increase the size of the network.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Dork:2012:NTW, author = "Marian D{\"o}rk and Carey Williamson and Sheelagh Carpendale", title = "Navigating tomorrow's web: From searching and browsing to visual exploration", journal = j-TWEB, volume = "6", number = "3", pages = "13:1--13:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2344416.2344420", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Nov 6 19:07:49 MST 2012", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We propose a new way of navigating the Web using interactive information visualizations, and present encouraging results from a large-scale Web study of a visual exploration system. While the Web has become an immense, diverse information space, it has also evolved into a powerful software platform. We believe that the established interaction techniques of searching and browsing do not sufficiently utilize these advances, since information seekers have to transform their information needs into specific, text-based search queries resulting in mostly text-based lists of resources. In contrast, we foresee a new type of information seeking that is high-level and more engaging, by providing the information seeker with interactive visualizations that give graphical overviews and enable query formulation. Building on recent work on faceted navigation, information visualization, and exploratory search, we conceptualize this type of information navigation as visual exploration and evaluate a prototype Web-based system that implements it. We discuss the results of a large-scale, mixed-method Web study that provides a better understanding of the potential benefits of visual exploration on the Web, and its particular performance challenges.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cambazoglu:2012:CBQ, author = "B. Barla Cambazoglu and Ismail Sengor Altingovde and Rifat Ozcan and {\"O}zg{\"u}r Ulusoy", title = "Cache-Based Query Processing for Search Engines", journal = j-TWEB, volume = "6", number = "4", pages = "14:1--14:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382616.2382617", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In practice, a search engine may fail to serve a query due to various reasons such as hardware/network failures, excessive query load, lack of matching documents, or service contract limitations (e.g., the query rate limits for third-party users of a search service). In this kind of scenarios, where the backend search system is unable to generate answers to queries, approximate answers can be generated by exploiting the previously computed query results available in the result cache of the search engine. In this work, we propose two alternative strategies to implement this cache-based query processing idea. The first strategy aggregates the results of similar queries that are previously cached in order to create synthetic results for new queries. The second strategy forms an inverted index over the textual information (i.e., query terms and result snippets) present in the result cache and uses this index to answer new queries. Both approaches achieve reasonable result qualities compared to processing queries with an inverted index built on the collection.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Delac:2012:MSS, author = "Goran Delac and Ivan Budiselic and Ivan Zuzak and Ivan Skuliber and Tomislav Stefanec", title = "A Methodology for {SIP} and {SOAP} Integration Using Application-Specific Protocol Conversion", journal = j-TWEB, volume = "6", number = "4", pages = "15:1--15:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382616.2382618", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In recent years, the ubiquitous demands for cross-protocol application access are driving the need for deeper integration between SIP and SOAP. In this article we present a novel methodology for integrating these two protocols. Through an analysis of properties of SIP and SOAP we show that integration between these protocols should be based on application-specific converters. We describe a generic SIP/SOAP gateway that implements message handling and network and storage management while relying on application-specific converters to define session management and message mapping for a specific set of SIP and SOAP communication nodes. In order to ease development of these converters, we introduce an XML-based domain-specific language for describing application-specific conversion processes. We show how conversion processes can be easily specified in the language using message sequence diagrams of the desired interaction. We evaluate the presented methodology through performance analysis of the developed prototype gateway and high-level comparison with other solutions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Jeon:2012:WCP, author = "Myeongjae Jeon and Youngjae Kim and Jeaho Hwang and Joonwon Lee and Euiseong Seo", title = "Workload Characterization and Performance Implications of Large-Scale Blog Servers", journal = j-TWEB, volume = "6", number = "4", pages = "16:1--16:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382616.2382619", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "With the ever-increasing popularity of Social Network Services (SNSs), an understanding of the characteristics of these services and their effects on the behavior of their host servers is critical. However, there has been a lack of research on the workload characterization of servers running SNS applications such as blog services. To fill this void, we empirically characterized real-world Web server logs collected from one of the largest South Korean blog hosting sites for 12 consecutive days. The logs consist of more than 96 million HTTP requests and 4.7TB of network traffic. Our analysis reveals the following: (i) The transfer size of nonmultimedia files and blog articles can be modeled using a truncated Pareto distribution and a log-normal distribution, respectively; (ii) user access for blog articles does not show temporal locality, but is strongly biased towards those posted with image or audio files. We additionally discuss the potential performance improvement through clustering of small files on a blog page into contiguous disk blocks, which benefits from the observed file access patterns. Trace-driven simulations show that, on average, the suggested approach achieves 60.6\% better system throughput and reduces the processing time for file access by 30.8\% compared to the best performance of the Ext4 filesystem.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wilson:2012:BSG, author = "Christo Wilson and Alessandra Sala and Krishna P. N. Puttaswamy and Ben Y. Zhao", title = "Beyond Social Graphs: User Interactions in Online Social Networks and their Implications", journal = j-TWEB, volume = "6", number = "4", pages = "17:1--17:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382616.2382620", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Social networks are popular platforms for interaction, communication, and collaboration between friends. Researchers have recently proposed an emerging class of applications that leverage relationships from social networks to improve security and performance in applications such as email, Web browsing, and overlay routing. While these applications often cite social network connectivity statistics to support their designs, researchers in psychology and sociology have repeatedly cast doubt on the practice of inferring meaningful relationships from social network connections alone. This leads to the question: ``Are social links valid indicators of real user interaction? If not, then how can we quantify these factors to form a more accurate model for evaluating socially enhanced applications?'' In this article, we address this question through a detailed study of user interactions in the Facebook social network. We propose the use of ``interaction graphs'' to impart meaning to online social links by quantifying user interactions. We analyze interaction graphs derived from Facebook user traces and show that they exhibit significantly lower levels of the ``small-world'' properties present in their social graph counterparts. This means that these graphs have fewer ``supernodes'' with extremely high degree, and overall graph diameter increases significantly as a result. To quantify the impact of our observations, we use both types of graphs to validate several well-known social-based applications that rely on graph properties to infuse new functionality into Internet applications, including Reliable Email (RE), SybilGuard, and the weighted cascade influence maximization algorithm. The results reveal new insights into each of these systems, and confirm our hypothesis that to obtain realistic and accurate results, ongoing research on social network applications studies of social applications should use real indicators of user interactions in lieu of social graphs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Weerkamp:2012:EEC, author = "Wouter Weerkamp and Krisztian Balog and Maarten de Rijke", title = "Exploiting External Collections for Query Expansion", journal = j-TWEB, volume = "6", number = "4", pages = "18:1--18:??", month = nov, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382616.2382621", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A persisting challenge in the field of information retrieval is the vocabulary mismatch between a user's information need and the relevant documents. One way of addressing this issue is to apply query modeling: to add terms to the original query and reweigh the terms. In social media, where documents usually contain creative and noisy language (e.g., spelling and grammatical errors), query modeling proves difficult. To address this, attempts to use external sources for query modeling have been made and seem to be successful. In this article we propose a general generative query expansion model that uses external document collections for term generation: the External Expansion Model (EEM). The main rationale behind our model is our hypothesis that each query requires its own mixture of external collections for expansion and that an expansion model should account for this. For some queries we expect, for example, a news collection to be most beneficial, while for other queries we could benefit more by selecting terms from a general encyclopedia. EEM allows for query-dependent weighing of the external collections. We put our model to the test on the task of blog post retrieval and we use four external collections in our experiments: (i) a news collection, (ii) a Web collection, (iii) Wikipedia, and (iv) a blog post collection. Experiments show that EEM outperforms query expansion on the individual collections, as well as the Mixture of Relevance Models that was previously proposed by Diaz and Metzler [2006]. Extensive analysis of the results shows that our naive approach to estimating query-dependent collection importance works reasonably well and that, when we use ``oracle'' settings, we see the full potential of our model. We also find that the query-dependent collection importance has more impact on retrieval performance than the independent collection importance (i.e., a collection prior).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wu:2013:MVC, author = "Ou Wu and Weiming Hu and Lei Shi", title = "Measuring the Visual Complexities of {Web} Pages", journal = j-TWEB, volume = "7", number = "1", pages = "1:1--1:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435215.2435216", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Visual complexities (VisComs) of Web pages significantly affect user experience, and automatic evaluation can facilitate a large number of Web-based applications. The construction of a model for measuring the VisComs of Web pages requires the extraction of typical features and learning based on labeled Web pages. However, as far as the authors are aware, little headway has been made on measuring VisCom in Web mining and machine learning. The present article provides a new approach combining Web mining techniques and machine learning algorithms for measuring the VisComs of Web pages. The structure of a Web page is first analyzed, and the layout is then extracted. Using a Web page as a semistructured image, three classes of features are extracted to construct a feature vector. The feature vector is fed into a learned measuring function to calculate the VisCom of the page. In the proposed approach of the present study, the type of the measuring function and its learning depend on the quantification strategy for VisCom. Aside from using a category and a score to represent VisCom as existing work, this study presents a new strategy utilizing a distribution to quantify the VisCom of a Web page. Empirical evaluation suggests the effectiveness of the proposed approach in terms of both features and learning algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Hanson:2013:PWA, author = "Vicki L. Hanson and John T. Richards", title = "Progress on {Website} Accessibility?", journal = j-TWEB, volume = "7", number = "1", pages = "2:1--2:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435215.2435217", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Over 100 top-traffic and government websites from the United States and United Kingdom were examined for evidence of changes on accessibility indicators over the 14-year period from 1999 to 2012, the longest period studied to date. Automated analyses of WCAG 2.0 Level A Success Criteria found high percentages of violations overall. Unlike more circumscribed studies, however, these sites exhibited improvements over the years on a number of accessibility indicators, with government sites being less likely than topsites to have accessibility violations. Examination of the causes of success and failure suggests that improving accessibility may be due, in part, to changes in website technologies and coding practices rather than a focus on accessibility per se.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Baykan:2013:CST, author = "Eda Baykan and Monika Henzinger and Ingmar Weber", title = "A Comprehensive Study of Techniques for {URL}-Based {Web} Page Language Classification", journal = j-TWEB, volume = "7", number = "1", pages = "3:1--3:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435215.2435218", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Given only the URL of a Web page, can we identify its language? In this article we examine this question. URL-based language classification is useful when the content of the Web page is not available or downloading the content is a waste of bandwidth and time. We built URL-based language classifiers for English, German, French, Spanish, and Italian by applying a variety of algorithms and features. As algorithms we used machine learning algorithms which are widely applied for text classification and state-of-art algorithms for language identification of text. As features we used words, various sized n-grams, and custom-made features (our novel feature set). We compared our approaches with two baseline methods, namely classification by country code top-level domains and classification by IP addresses of the hosting Web servers. We trained and tested our classifiers in a 10-fold cross-validation setup on a dataset obtained from the Open Directory Project and from querying a commercial search engine. We obtained the lowest F1-measure for English (94) and the highest F1-measure for German (98) with the best performing classifiers. We also evaluated the performance of our methods: (i) on a set of Web pages written in Adobe Flash and (ii) as part of a language-focused crawler. In the first case, the content of the Web page is hard to extract and in the second page downloading pages of the ``wrong'' language constitutes a waste of bandwidth. In both settings the best classifiers have a high accuracy with an F1-measure between 95 (for English) and 98 (for Italian) for the Adobe Flash pages and a precision between 90 (for Italian) and 97 (for French) for the language-focused crawler.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Marriott:2013:HAT, author = "Kim Marriott and Peter Moulder and Nathan Hurst", title = "{HTML} Automatic Table Layout", journal = j-TWEB, volume = "7", number = "1", pages = "4:1--4:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435215.2435219", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sun May 5 09:27:25 MDT 2013", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Automatic layout of tables is required in online applications because of the need to tailor the layout to the viewport width, choice of font, and dynamic content. However, if the table contains text, minimizing the height of the table for a fixed maximum width is NP-hard. Thus, more efficient heuristic algorithms are required. We evaluate the HTML table layout recommendation and find that while it generally produces quite compact layout it is brittle and can lead to quite uncompact layout. We present an alternate heuristic algorithm. It uses a greedy strategy that starts from the widest reasonable layout and repeatedly chooses to narrow the column for which narrowing leads to the least increase in table height. The algorithm is simple, fast enough to be used in online applications, and gives significantly more compact layout than is obtained with HTML's recommended table layout algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Anisetti:2013:TBS, author = "Marco Anisetti and Claudio A. Ardagna and Ernesto Damiani and Francesco Saonara", title = "A test-based security certification scheme for {Web} services", journal = j-TWEB, volume = "7", number = "2", pages = "5:1--5:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460384", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Service-Oriented Architecture (SOA) paradigm is giving rise to a new generation of applications built by dynamically composing loosely coupled autonomous services. Clients (i.e., software agents acting on behalf of human users or service providers) implementing such complex applications typically search and integrate services on the basis of their functional requirements and of their trust in the service suppliers. A major issue in this scenario relates to the definition of an assurance technique allowing clients to select services on the basis of their nonfunctional requirements and increasing their confidence that the selected services will satisfy such requirements. In this article, we first present an assurance solution that focuses on security and supports a test-based security certification scheme for Web services. The certification scheme is driven by the security properties to be certified and relies upon a formal definition of the service model. The evidence supporting a certified property is computed using a model-based testing approach that, starting from the service model, automatically generates the test cases to be used in the service certification. We also define a set of indexes and metrics that evaluate the assurance level and the quality of the certification process. Finally, we present our evaluation toolkit and experimental results obtained applying our certification solution to a financial service implementing the Interactive Financial eXchange (IFX) standard.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Victor:2013:ETB, author = "Patricia Victor and Nele Verbiest and Chris Cornelis and Martine {De Cock}", title = "Enhancing the trust-based recommendation process with explicit distrust", journal = j-TWEB, volume = "7", number = "2", pages = "6:1--6:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460385", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "When a Web application with a built-in recommender offers a social networking component which enables its users to form a trust network, it can generate more personalized recommendations by combining user ratings with information from the trust network. These are the so-called trust-enhanced recommendation systems. While research on the incorporation of trust for recommendations is thriving, the potential of explicitly stated distrust remains almost unexplored. In this article, we introduce a distrust-enhanced recommendation algorithm which has its roots in Golbeck's trust-based weighted mean. Through experiments on a set of reviews from Epinions.com, we show that our new algorithm outperforms its standard trust-only counterpart with respect to accuracy, thereby demonstrating the positive effect that explicit distrust can have on trust-based recommendations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Yue:2013:MSI, author = "Chuan Yue and Haining Wang", title = "A measurement study of insecure {JavaScript} practices on the {Web}", journal = j-TWEB, volume = "7", number = "2", pages = "7:1--7:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460386", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/java2010.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "JavaScript is an interpreted programming language most often used for enhancing webpage interactivity and functionality. It has powerful capabilities to interact with webpage documents and browser windows, however, it has also opened the door for many browser-based security attacks. Insecure engineering practices of using JavaScript may not directly lead to security breaches, but they can create new attack vectors and greatly increase the risks of browser-based attacks. In this article, we present the first measurement study on insecure practices of using JavaScript on the Web. Our focus is on the insecure practices of JavaScript inclusion and dynamic generation, and we examine their severity and nature on 6,805 unique websites. Our measurement results reveal that insecure JavaScript practices are common at various websites: (1) at least 66.4\% of the measured websites manifest the insecure practices of including JavaScript files from external domains into the top-level documents of their webpages; (2) over 44.4\% of the measured websites use the dangerous eval() function to dynamically generate and execute JavaScript code on their webpages; and (3) in JavaScript dynamic generation, using the document.write() method and the innerHTML property is much more popular than using the relatively secure technique of creating script elements via DOM methods. Our analysis indicates that safe alternatives to these insecure practices exist in common cases and ought to be adopted by website developers and administrators for reducing potential security risks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Su:2013:UQI, author = "Weifeng Su and Hejun Wu and Yafei Li and Jing Zhao and Frederick H. Lochovsky and Hongmin Cai and Tianqiang Huang", title = "Understanding query interfaces by statistical parsing", journal = j-TWEB, volume = "7", number = "2", pages = "8:1--8:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460387", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Users submit queries to an online database via its query interface. Query interface parsing, which is important for many applications, understands the query capabilities of a query interface. Since most query interfaces are organized hierarchically, we present a novel query interface parsing method, StatParser (Statistical Parser), to automatically extract the hierarchical query capabilities of query interfaces. StatParser automatically learns from a set of parsed query interfaces and parses new query interfaces. StatParser starts from a small grammar and enhances the grammar with a set of probabilities learned from parsed query interfaces under the maximum-entropy principle. Given a new query interface, the probability-enhanced grammar identifies the parse tree with the largest global probability to be the query capabilities of the query interface. Experimental results show that StatParser very accurately extracts the query capabilities and can effectively overcome the problems of existing query interface parsers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Diaz:2013:LEU, author = "Oscar D{\'\i}az and Crist{\'o}bal Arellano and Maider Azanza", title = "A language for end-user {Web} augmentation: Caring for producers and consumers alike", journal = j-TWEB, volume = "7", number = "2", pages = "9:1--9:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460388", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/java2010.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web augmentation is to the Web what augmented reality is to the physical world: layering relevant content/layout/navigation over the existing Web to customize the user experience. This is achieved through JavaScript (JS) using browser weavers (e.g., Greasemonkey). To date, over 43 million of downloads of Greasemonkey scripts ground the vitality of this movement. However, Web augmentation is hindered by being programming intensive and prone to malware. This prevents end-users from participating as both producers and consumers of scripts: producers need to know JS, consumers need to trust JS. This article aims at promoting end-user participation in both roles. The vision is for end-users to prosume (the act of simultaneously caring for producing and consuming) scripts as easily as they currently prosume their pictures or videos. Encouraging production requires more ``natural'' and abstract constructs. Promoting consumption calls for augmentation scripts to be easier to understand, share, and trust upon. To this end, we explore the use of Domain-Specific Languages (DSLs) by introducing Sticklet. Sticklet is an internal DSL on JS, where JS generality is reduced for the sake of learnability and reliability. Specifically, Web augmentation is conceived as fixing in existing web sites (i.e., the wall ) HTML fragments extracted from either other sites or Web services (i.e., the stickers ). Sticklet targets hobby programmers as producers, and computer literates as consumers. From a producer perspective, benefits are threefold. As a restricted grammar on top of JS, Sticklet expressions are domain oriented and more declarative than their JS counterparts, hence speeding up development. As syntactically correct JS expressions, Sticklet scripts can be installed as traditional scripts and hence, programmers can continue using existing JS tools. As declarative expressions, they are easier to maintain, and amenable for optimization. From a consumer perspective, domain specificity brings understandability (due to declarativeness), reliability (due to built-in security), and ``consumability'' (i.e., installation/enactment/sharing of Sticklet expressions are tuned to the shortage of time and skills of the target audience). Preliminary evaluations indicate that 77\% of the subjects were able to develop new Sticklet scripts in less than thirty minutes while 84\% were able to consume these scripts in less than ten minutes. Sticklet is available to download as a Mozilla add-on.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Kaldeli:2013:CWS, author = "Eirini Kaldeli and Ehsan Ullah Warriach and Alexander Lazovik and Marco Aiello", title = "Coordinating the web of services for a smart home", journal = j-TWEB, volume = "7", number = "2", pages = "10:1--10:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460389", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Domotics, concerned with the realization of intelligent home environments, is a novel field which can highly benefit from solutions inspired by service-oriented principles to enhance the convenience and security of modern home residents. In this work, we present an architecture for a smart home, starting from the lower device interconnectivity level up to the higher application layers that undertake the load of complex functionalities and provide a number of services to end-users. We claim that in order for smart homes to exhibit a genuinely intelligent behavior, the ability to compute compositions of individual devices automatically and dynamically is paramount. To this end, we incorporate into the architecture a composition component that employs artificial intelligence domain-independent planning to generate compositions at runtime, in a constantly evolving environment. We have implemented a fully working prototype that realizes such an architecture, and have evaluated it both in terms of performance as well as from the end-user point of view. The results of the evaluation show that the service-oriented architectural design and the support for dynamic compositions is quite efficient from the technical point of view, and that the system succeeds in satisfying the expectations and objectives of the users.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Balakrishnan:2013:ART, author = "Raju Balakrishnan and Subbarao Kambhampati and Manishkumar Jha", title = "Assessing relevance and trust of the deep web sources and results based on inter-source agreement", journal = j-TWEB, volume = "7", number = "2", pages = "11:1--11:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2460383.2460390", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:18 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Deep web search engines face the formidable challenge of retrieving high-quality results from the vast collection of searchable databases. Deep web search is a two-step process of selecting the high-quality sources and ranking the results from the selected sources. Though there are existing methods for both the steps, they assess the relevance of the sources and the results using the query-result similarity. When applied to the deep web these methods have two deficiencies. First is that they are agnostic to the correctness (trustworthiness) of the results. Second, the query-based relevance does not consider the importance of the results and sources. These two considerations are essential for the deep web and open collections in general. Since a number of deep web sources provide answers to any query, we conjuncture that the agreements between these answers are helpful in assessing the importance and the trustworthiness of the sources and the results. For assessing source quality, we compute the agreement between the sources as the agreement of the answers returned. While computing the agreement, we also measure and compensate for the possible collusion between the sources. This adjusted agreement is modeled as a graph with sources at the vertices. On this agreement graph, a quality score of a source, that we call SourceRank, is calculated as the stationary visit probability of a random walk. For ranking results, we analyze the second-order agreement between the results. Further extending SourceRank to multidomain search, we propose a source ranking sensitive to the query domains. Multiple domain-specific rankings of a source are computed, and these ranks are combined for the final ranking. We perform extensive evaluations on online and hundreds of Google Base sources spanning across domains. The proposed result and source rankings are implemented in the deep web search engine Factal. We demonstrate that the agreement analysis tracks source corruption. Further, our relevance evaluations show that our methods improve precision significantly over Google Base and the other baseline methods. The result ranking and the domain-specific source ranking are evaluated separately.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Nguyen:2013:FWT, author = "Cam-Tu Nguyen and Natsuda Kaothanthong and Takeshi Tokuyama and Xuan-Hieu Phan", title = "A feature-word-topic model for image annotation and retrieval", journal = j-TWEB, volume = "7", number = "3", pages = "12:1--12:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516634", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Image annotation is a process of finding appropriate semantic labels for images in order to obtain a more convenient way for indexing and searching images on the Web. This article proposes a novel method for image annotation based on combining feature-word distributions, which map from visual space to word space, and word-topic distributions, which form a structure to capture label relationships for annotation. We refer to this type of model as Feature-Word-Topic models. The introduction of topics allows us to efficiently take word associations, such as {ocean, fish, coral} or {desert, sand, cactus}, into account for image annotation. Unlike previous topic-based methods, we do not consider topics as joint distributions of words and visual features, but as distributions of words only. Feature-word distributions are utilized to define weights in computation of topic distributions for annotation. By doing so, topic models in text mining can be applied directly in our method. Our Feature-word-topic model, which exploits Gaussian Mixtures for feature-word distributions, and probabilistic Latent Semantic Analysis (pLSA) for word-topic distributions, shows that our method is able to obtain promising results in image annotation and retrieval.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Vargiu:2013:ICA, author = "Eloisa Vargiu and Alessandro Giuliani and Giuliano Armano", title = "Improving contextual advertising by adopting collaborative filtering", journal = j-TWEB, volume = "7", number = "3", pages = "13:1--13:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516635", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Contextual advertising can be viewed as an information filtering task aimed at selecting suitable ads to be suggested to the final ``user'', that is, the Web page in hand. Starting from this insight, in this article we propose a novel system, which adopts a collaborative filtering approach to perform contextual advertising. In particular, given a Web page, the system relies on collaborative filtering to classify the page content and to suggest suitable ads accordingly. Useful information is extracted from ``inlinks'', that is, similar pages that link to the Web page in hand. In so doing, collaborative filtering is used in a content-based setting, giving rise to a hybrid contextual advertising system. After being implemented, the system has been experimented with about 15000 Web pages extracted from the Open Directory Project. Comparative experiments with a content-based system have been performed. The corresponding results highlight that the proposed system performs better. A suitable case study is also provided to enable the reader to better understand how the system works and its effectiveness.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Conti:2013:VPS, author = "Mauro Conti and Arbnor Hasani and Bruno Crispo", title = "Virtual private social networks and a {Facebook} implementation", journal = j-TWEB, volume = "7", number = "3", pages = "14:1--14:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516636", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The popularity of Social Networking Sites (SNS) is growing rapidly, with the largest sites serving hundreds of millions of users and their private information. The privacy settings of these SNSs do not allow the user to avoid sharing some information (e.g., name and profile picture) with all the other users. Also, no matter the privacy settings, this information is always shared with the SNS (that could sell this information or be hacked). To mitigate these threats, we recently introduced the concept of Virtual Private Social Networks (VPSNs). In this work we propose the first complete architecture and implementation of VPSNs for Facebook. In particular, we address an important problem left unexplored in our previous research-that is the automatic propagation of updated profiles to all the members of the same VPSN. Furthermore, we made an in-depth study on performance and implemented several optimization to reduce the impact of VPSN on user experience. The proposed solution is lightweight, completely distributed, does not depend on the collaboration from Facebook, does not have a central point of failure, it offers (with some limitations) the same functionality as Facebook, and apart from some simple settings, the solution is almost transparent to the user. Thorough experiments, with an extended set of parameters, we have confirmed the feasibility of the proposal and have shown a very limited time-overhead experienced by the user while browsing Facebook pages.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cambazoglu:2013:TBI, author = "B. Barla Cambazoglu and Enver Kayaaslan and Simon Jonassen and Cevdet Aykanat", title = "A term-based inverted index partitioning model for efficient distributed query processing", journal = j-TWEB, volume = "7", number = "3", pages = "15:1--15:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516637", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In a shared-nothing, distributed text retrieval system, queries are processed over an inverted index that is partitioned among a number of index servers. In practice, the index is either document-based or term-based partitioned. This choice is made depending on the properties of the underlying hardware infrastructure, query traffic distribution, and some performance and availability constraints. In query processing on retrieval systems that adopt a term-based index partitioning strategy, the high communication overhead due to the transfer of large amounts of data from the index servers forms a major performance bottleneck, deteriorating the scalability of the entire distributed retrieval system. In this work, to alleviate this problem, we propose a novel inverted index partitioning model that relies on hypergraph partitioning. In the proposed model, concurrently accessed index entries are assigned to the same index servers, based on the inverted index access patterns extracted from the past query logs. The model aims to minimize the communication overhead that will be incurred by future queries while maintaining the computational load balance among the index servers. We evaluate the performance of the proposed model through extensive experiments using a real-life text collection and a search query sample. Our results show that considerable performance gains can be achieved relative to the term-based index partitioning strategies previously proposed in literature. In most cases, however, the performance remains inferior to that attained by document-based partitioning.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Weninger:2013:PPF, author = "Tim Weninger and Thomas J. Johnston and Jiawei Han", title = "The parallel path framework for entity discovery on the web", journal = j-TWEB, volume = "7", number = "3", pages = "16:1--16:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516638", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "It has been a dream of the database and Web communities to reconcile the unstructured nature of the World Wide Web with the neat, structured schemas of the database paradigm. Even though databases are currently used to generate Web content in some sites, the schemas of these databases are rarely consistent across a domain. This makes the comparison and aggregation of information from different domains difficult. We aim to make an important step towards resolving this disparity by using the structural and relational information on the Web to (1) extract Web lists, (2) find entity-pages, (3) map entity-pages to a database, and (4) extract attributes of the entities. Specifically, given a Web site and an entity-page (e.g., university department and faculty member home page) we seek to find all of the entity-pages of the same type (e.g., all faculty members in the department), as well as attributes of the specific entities (e.g., their phone numbers, email addresses, office numbers). To do this, we propose a Web structure mining method which grows parallel paths through the Web graph and DOM trees and propagates relevant attribute information forward. We show that by utilizing these parallel paths we can efficiently discover entity-pages and attributes. Finally, we demonstrate the accuracy of our method with a large case study.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liu:2013:SCB, author = "Liwei Liu and Freddy Lecue and Nikolay Mehandjiev", title = "Semantic content-based recommendation of software services using context", journal = j-TWEB, volume = "7", number = "3", pages = "17:1--17:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2516633.2516639", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:20 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The current proliferation of software services means users should be supported when selecting one service out of the many which meet their needs. Recommender Systems provide such support for selecting products and conventional services, yet their direct application to software services is not straightforward, because of the current scarcity of available user feedback, and the need to fine-tune software services to the context of intended use. In this article, we address these issues by proposing a semantic content-based recommendation approach that analyzes the context of intended service use to provide effective recommendations in conditions of scarce user feedback. The article ends with two experiments based on a realistic set of semantic services. The first experiment demonstrates how the proposed semantic content-based approach can produce effective recommendations using semantic reasoning over service specifications by comparing it with three other approaches. The second experiment demonstrates the effectiveness of the proposed context analysis mechanism by comparing the performance of both context-aware and plain versions of our semantic content-based approach, benchmarked against user-performed selection informed by context.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Jiang:2013:ULI, author = "Jing Jiang and Christo Wilson and Xiao Wang and Wenpeng Sha and Peng Huang and Yafei Dai and Ben Y. Zhao", title = "Understanding latent interactions in online social networks", journal = j-TWEB, volume = "7", number = "4", pages = "18:1--18:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2517040", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Popular online social networks (OSNs) like Facebook and Twitter are changing the way users communicate and interact with the Internet. A deep understanding of user interactions in OSNs can provide important insights into questions of human social behavior and into the design of social platforms and applications. However, recent studies have shown that a majority of user interactions on OSNs are latent interactions, that is, passive actions, such as profile browsing, that cannot be observed by traditional measurement techniques. In this article, we seek a deeper understanding of both active and latent user interactions in OSNs. For quantifiable data on latent user interactions, we perform a detailed measurement study on Renren, the largest OSN in China with more than 220 million users to date. All friendship links in Renren are public, allowing us to exhaustively crawl a connected graph component of 42 million users and 1.66 billion social links in 2009. Renren also keeps detailed, publicly viewable visitor logs for each user profile. We capture detailed histories of profile visits over a period of 90 days for users in the Peking University Renren network and use statistics of profile visits to study issues of user profile popularity, reciprocity of profile visits, and the impact of content updates on user popularity. We find that latent interactions are much more prevalent and frequent than active events, are nonreciprocal in nature, and that profile popularity is correlated with page views of content rather than with quantity of content updates. Finally, we construct latent interaction graphs as models of user browsing behavior and compare their structural properties, evolution, community structure, and mixing times against those of both active interaction graphs and social graphs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Quarteroni:2013:BKA, author = "Silvia Quarteroni and Marco Brambilla and Stefano Ceri", title = "A bottom-up, knowledge-aware approach to integrating and querying {Web} data services", journal = j-TWEB, volume = "7", number = "4", pages = "19:1--19:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2493536", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "As a wealth of data services is becoming available on the Web, building and querying Web applications that effectively integrate their content is increasingly important. However, schema integration and ontology matching with the aim of registering data services often requires a knowledge-intensive, tedious, and error-prone manual process. We tackle this issue by presenting a bottom-up, semi-automatic service registration process that refers to an external knowledge base and uses simple text processing techniques in order to minimize and possibly avoid the contribution of domain experts in the annotation of data services. The first by-product of this process is a representation of the domain of data services as an entity-relationship diagram, whose entities are named after concepts of the external knowledge base matching service terminology rather than being manually created to accommodate an application-specific ontology. Second, a three-layer annotation of service semantics (service interfaces, access patterns, service marts) describing how services ``play'' with such domain elements is also automatically constructed at registration time. When evaluated against heterogeneous existing data services and with a synthetic service dataset constructed using Google Fusion Tables, the approach yields good results in terms of data representation accuracy. We subsequently demonstrate that natural language processing methods can be used to decompose and match simple queries to the data services represented in three layers according to the preceding methodology with satisfactory results. We show how semantic annotations are used at query time to convert the user's request into an executable logical query. Globally, our findings show that the proposed registration method is effective in creating a uniform semantic representation of data services, suitable for building Web applications and answering search queries.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Leiva:2013:WBB, author = "Luis A. Leiva and Roberto Viv{\'o}", title = "{Web} browsing behavior analysis and interactive hypervideo", journal = j-TWEB, volume = "7", number = "4", pages = "20:1--20:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2529995.2529996", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Processing data on any sort of user interaction is well known to be cumbersome and mostly time consuming. In order to assist researchers in easily inspecting fine-grained browsing data, current tools usually display user interactions as mouse cursor tracks, a video-like visualization scheme. However, to date, traditional online video inspection has not explored the full capabilities of hypermedia and interactive techniques. In response to this need, we have developed SMT2 \epsilon , a Web-based tracking system for analyzing browsing behavior using feature-rich hypervideo visualizations. We compare our system to related work in academia and the industry, showing that ours features unprecedented visualization capabilities. We also show that SMT2 \epsilon efficiently captures browsing data and is perceived by users to be both helpful and usable. A series of prediction experiments illustrate that raw cursor data are accessible and can be easily handled, providing evidence that the data can be used to construct and verify research hypotheses. Considering its limitations, it is our hope that SMT2 \epsilon will assist researchers, usability practitioners, and other professionals interested in understanding how users browse the Web.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bing:2013:RDS, author = "Lidong Bing and Wai Lam and Tak-Lam Wong", title = "Robust detection of semi-structured web records using a {DOM} structure-knowledge-driven model", journal = j-TWEB, volume = "7", number = "4", pages = "21:1--21:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2508434", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web data record extraction aims at extracting a set of similar object records from a single webpage. These records have similar attributes or fields and are presented with a regular format in a coherent region of the page. To tackle this problem, most existing works analyze the DOM tree of an input page. One major limitation of these methods is that the lack of a global view in detecting data records from an input page results in a myopic decision. Their brute-force searching manner in detecting various types of records degrades the flexibility and robustness. We propose a Structure-Knowledge-Oriented Global Analysis (Skoga) framework which can perform robust detection of different-kinds of data records and record regions. The major component of the Skoga framework is a DOM structure-knowledge-driven detection model which can conduct a global analysis on the DOM structure to achieve effective detection. The DOM structure knowledge consists of background knowledge as well as statistical knowledge capturing different characteristics of data records and record regions, as exhibited in the DOM structure. The background knowledge encodes the semantics of labels indicating general constituents of data records and regions. The statistical knowledge is represented by some carefully designed features that capture different characteristics of a single node or a node group in the DOM. The feature weights are determined using a development dataset via a parameter estimation algorithm based on a structured output support vector machine. An optimization method based on the divide-and-conquer principle is developed making use of the DOM structure knowledge to quantitatively infer and recognize appropriate records and regions for a page. Extensive experiments have been conducted on four datasets. The experimental results demonstrate that our framework achieves higher accuracy compared with state-of-the-art methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liao:2013:VAC, author = "Zhen Liao and Daxin Jiang and Jian Pei and Yalou Huang and Enhong Chen and Huanhuan Cao and Hang Li", title = "A {vlHMM} approach to context-aware search", journal = j-TWEB, volume = "7", number = "4", pages = "22:1--22:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2490255", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Capturing the context of a user's query from the previous queries and clicks in the same session leads to a better understanding of the user's information need. A context-aware approach to document reranking, URL recommendation, and query suggestion may substantially improve users' search experience. In this article, we propose a general approach to context-aware search by learning a variable length hidden Markov model ( vlHMM ) from search sessions extracted from log data. While the mathematical model is powerful, the huge amounts of log data present great challenges. We develop several distributed learning techniques to learn a very large vlHMM under the map-reduce framework. Moreover, we construct feature vectors for each state of the vlHMM model to handle users' novel queries not covered by the training data. We test our approach on a raw dataset consisting of 1.9 billion queries, 2.9 billion clicks, and 1.2 billion search sessions before filtering, and evaluate the effectiveness of the vlHMM learned from the real data on three search applications: document reranking, query suggestion, and URL recommendation. The experiment results validate the effectiveness of vlHMM in the applications of document reranking, URL recommendation, and query suggestion.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{White:2013:CBD, author = "Ryen W. White and Eric Horvitz", title = "Captions and biases in diagnostic search", journal = j-TWEB, volume = "7", number = "4", pages = "23:1--23:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2486040", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "People frequently turn to the Web with the goal of diagnosing medical symptoms. Studies have shown that diagnostic search can often lead to anxiety about the possibility that symptoms are explained by the presence of rare, serious medical disorders, rather than far more common benign syndromes. We study the influence of the appearance of potentially-alarming content, such as severe illnesses or serious treatment options associated with the queried for symptoms, in captions comprising titles, snippets, and URLs. We explore whether users are drawn to results with potentially-alarming caption content, and if so, the implications of such attraction for the design of search engines. We specifically study the influence of the content of search result captions shown in response to symptom searches on search-result click-through behavior. We show that users are significantly more likely to examine and click on captions containing potentially-alarming medical terminology such as ``heart attack'' or ``medical emergency'' independent of result rank position and well-known positional biases in users' search examination behaviors. The findings provide insights about the possible effects of displaying implicit correlates of searchers' goals in search-result captions, such as unexpressed concerns and fears. As an illustration of the potential utility of these results, we developed and evaluated an enhanced click prediction model that incorporates potentially-alarming caption features and show that it significantly outperforms models that ignore caption content. Beyond providing additional understanding of the effects of Web content on medical concerns, the methods and findings have implications for search engine design. As part of our discussion on the implications of this research, we propose procedures for generating more representative captions that may be less likely to cause alarm, as well as methods for learning to more appropriately rank search results from logged search behavior, for examples, by also considering the presence of potentially-alarming content in the captions that motivate observed clicks and down-weighting clicks seemingly driven by searchers' health anxieties.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "23", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Lee:2013:SCA, author = "Jung-Hyun Lee and Jongwoo Ha and Jin-Yong Jung and Sangkeun Lee", title = "Semantic contextual advertising based on the open directory project", journal = j-TWEB, volume = "7", number = "4", pages = "24:1--24:??", month = oct, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2529995.2529997", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:21 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Contextual advertising seeks to place relevant textual ads within the content of generic webpages. In this article, we explore a novel semantic approach to contextual advertising. This consists of three tasks: (1) building a well-organized hierarchical taxonomy of topics, (2) developing a robust classifier for effectively finding the topics of pages and ads, and (3) ranking ads based on the topical relevance to pages. First, we heuristically build our own taxonomy of topics from the Open Directory Project (ODP). Second, we investigate how to increase classification accuracy by taking the unique characteristics of the ODP into account. Last, we measure the topical relevance of ads by applying a link analysis technique to the similarity graph carefully derived from our taxonomy. Experiments show that our classification method improves the performance of Ma- F$_1$ by as much as 25.7\% over the baseline classifier. In addition, our ranking method enhances the relevance of ads substantially, up to 10\% in terms of precision at k, compared to a representative strategy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "24", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Huang:2013:UEQ, author = "Xiaodi Huang", title = "{UsageQoS}: Estimating the {QoS} of {Web} Services through Online User Communities", journal = j-TWEB, volume = "8", number = "1", pages = "1:1--1:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2532635", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Services are an indispensable component in cloud computing. Web services are particularly important. As an increasing number of Web services provides equivalent functions, one common issue faced by users is the selection of the most appropriate one based on quality. This article presents a conceptual framework that characterizes the quality of Web services, an algorithm that quantifies them, and a system architecture that ranks Web services by using the proposed algorithm. In particular, the algorithm, called UsageQoS that computes the scores of quality of service (QoS) of Web services within a community, makes use of the usage frequencies of Web services. The frequencies are defined as the numbers of times invoked by other services in a given time period. The UsageQoS algorithm is able to optionally take user ratings as its initial input. The proposed approach has been validated by extensively experimenting on several datasets, including two real datasets. The results of the experiments have demonstrated that our approach is capable of estimating QoS parameters of Web services, regardless of whether user ratings are available or not.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Weber:2013:FBW, author = "Ingo Weber and Hye-Young Paik and Boualem Benatallah", title = "Form-Based {Web} Service Composition for Domain Experts", journal = j-TWEB, volume = "8", number = "1", pages = "2:1--2:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2542168", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In many cases, it is not cost effective to automate business processes which affect a small number of people and/or change frequently. We present a novel approach for enabling domain experts to model and deploy such processes from their respective domain as Web service compositions. The approach builds on user-editable service, naming and representing Web services as forms. On this basis, the approach provides a visual composition language with a targeted restriction of control-flow expressivity, process simulation, automated process verification mechanisms, and code generation for executing orchestrations. A Web-based service composition prototype implements this approach, including a WS-BPEL code generator. A small lab user study with 14 participants showed promising results for the usability of the system, even for nontechnical domain experts.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Ozcan:2013:SCH, author = "Rifat Ozcan and Ismail Sengor Altingovde and B. Barla Cambazoglu and {\"O}zg{\"u}r Ulusoy", title = "Second Chance: a Hybrid Approach for Dynamic Result Caching and Prefetching in Search Engines", journal = j-TWEB, volume = "8", number = "1", pages = "3:1--3:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536777", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web search engines are known to cache the results of previously issued queries. The stored results typically contain the document summaries and some data that is used to construct the final search result page returned to the user. An alternative strategy is to store in the cache only the result document IDs, which take much less space, allowing results of more queries to be cached. These two strategies lead to an interesting trade-off between the hit rate and the average query response latency. In this work, in order to exploit this trade-off, we propose a hybrid result caching strategy where a dynamic result cache is split into two sections: an HTML cache and a docID cache. Moreover, using a realistic cost model, we evaluate the performance of different result prefetching strategies for the proposed hybrid cache and the baseline HTML-only cache. Finally, we propose a machine learning approach to predict singleton queries, which occur only once in the query stream. We show that when the proposed hybrid result caching strategy is coupled with the singleton query predictor, the hit rate is further improved.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Sherkat:2013:ETS, author = "Reza Sherkat and Jing Li and Nikos Mamoulis", title = "Efficient Time-Stamped Event Sequence Anonymization", journal = j-TWEB, volume = "8", number = "1", pages = "4:1--4:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2532643", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "With the rapid growth of applications which generate timestamped sequences (click streams, GPS trajectories, RFID sequences), sequence anonymization has become an important problem, in that should such data be published or shared. Existing trajectory anonymization techniques disregard the importance of time or the sensitivity of events. This article is the first, to our knowledge, thorough study on time-stamped event sequence anonymization. We propose a novel and tunable generalization framework tailored to event sequences. We generalize time stamps using time intervals and events using a taxonomy which models the domain semantics. We consider two scenarios: (i) sharing the data with a single receiver (the SSR setting), where the receiver's background knowledge is confined to a set of time stamps and time generalization suffices, and (ii) sharing the data with colluding receivers (the SCR setting), where time generalization should be combined with event generalization. For both cases, we propose appropriate anonymization methods that prevent both user identification and event prediction. To achieve computational efficiency and scalability, we propose optimization techniques for both cases using a utility-based index, compact summaries, fast to compute bounds for utility, and a novel taxonomy-aware distance function. Extensive experiments confirm the effectiveness of our approach compared with state of the art, in terms of information loss, range query distortion, and preserving temporal causality patterns. Furthermore, our experiments demonstrate efficiency and scalability on large-scale real and synthetic datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bellido:2013:CFP, author = "Jesus Bellido and Rosa Alarc{\'o}n and Cesare Pautasso", title = "Control-Flow Patterns for Decentralized {RESTful} Service Composition", journal = j-TWEB, volume = "8", number = "1", pages = "5:1--5:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2535911", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The REST architectural style has attracted a lot of interest from industry due to the nonfunctional properties it contributes to Web-based solutions. SOAP/WSDL-based services, on the other hand, provide tools and methodologies that allow the design and development of software supporting complex service arrangements, enabling complex business processes which make use of well-known control-flow patterns. It is not clear if and how such patterns should be modeled, considering RESTful Web services that comply with the statelessness, uniform interface and hypermedia constraints. In this article, we analyze a set of fundamental control-flow patterns in the context of stateless compositions of RESTful services. We propose a means of enabling their implementation using the HTTP protocol and discuss the impact of our design choices according to key REST architectural principles. We hope to shed new light on the design of basic building blocks for RESTful business processes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Chelaru:2013:ADE, author = "Sergiu Chelaru and Ismail Sengor Altingovde and Stefan Siersdorfer and Wolfgang Nejdl", title = "Analyzing, Detecting, and Exploiting Sentiment in {Web} Queries", journal = j-TWEB, volume = "8", number = "1", pages = "6:1--6:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2535525", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Mar 13 08:28:23 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Web contains an increasing amount of biased and opinionated documents on politics, products, and polarizing events. In this article, we present an indepth analysis of Web search queries for controversial topics, focusing on query sentiment. To this end, we conduct extensive user assessments and discriminative term analyses, as well as a sentiment analysis using the SentiWordNet thesaurus, a lexical resource containing sentiment annotations. Furthermore, in order to detect the sentiment expressed in queries, we build different classifiers based on query texts, query result titles, and snippets. We demonstrate the virtue of query sentiment detection in two different use cases. First, we define a query recommendation scenario that employs sentiment detection of results to recommend additional queries for polarized queries issued by search engine users. The second application scenario is controversial topic discovery, where query sentiment classifiers are employed to discover previously unknown topics that trigger both highly positive and negative opinions among the users of a search engine. For both use cases, the results of our evaluations on real-world data are promising and show the viability and potential of query sentiment analysis in practical scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Torres:2014:ASB, author = "Sergio Duarte Torres and Ingmar Weber and Djoerd Hiemstra", title = "Analysis of Search and Browsing Behavior of Young Users on the {Web}", journal = j-TWEB, volume = "8", number = "2", pages = "7:1--7:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2555595", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Internet is increasingly used by young children for all kinds of purposes. Nonetheless, there are not many resources especially designed for children on the Internet and most of the content online is designed for grown-up users. This situation is problematic if we consider the large differences between young users and adults since their topic interests, computer skills, and language capabilities evolve rapidly during childhood. There is little research aimed at exploring and measuring the difficulties that children encounter on the Internet when searching for information and browsing for content. In the first part of this work, we employed query logs from a commercial search engine to quantify the difficulties children of different ages encounter on the Internet and to characterize the topics that they search for. We employed query metrics (e.g., the fraction of queries posed in natural language), session metrics (e.g., the fraction of abandoned sessions), and click activity (e.g., the fraction of ad clicks). The search logs were also used to retrace stages of child development. Concretely, we looked for changes in interests (e.g., the distribution of topics searched) and language development (e.g., the readability of the content accessed and the vocabulary size). In the second part of this work, we employed toolbar logs from a commercial search engine to characterize the browsing behavior of young users, particularly to understand the activities on the Internet that trigger search. We quantified the proportion of browsing and search activity in the toolbar sessions and we estimated the likelihood of a user to carry out search on the Web vertical and multimedia verticals (i.e., videos and images) given that the previous event is another search event or a browsing event. We observed that these metrics clearly demonstrate an increased level of confusion and unsuccessful search sessions among children. We also found a clear relation between the reading level of the clicked pages and characteristics of the users such as age and educational attainment. In terms of browsing behavior, children were found to start their activities on the Internet with a search engine (instead of directly browsing content) more often than adults. We also observed a significantly larger amount of browsing activity for the case of teenager users. Interestingly we also found that if children visit knowledge-related Web sites (i.e., information-dense pages such as Wikipedia articles), they subsequently do more Web searches than adults. Additionally, children and especially teenagers were found to have a greater tendency to engage in multimedia search, which calls to improve the aggregation of multimedia results into the current search result pages.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Su:2014:HIY, author = "Ao-Jan Su and Y. Charlie Hu and Aleksandar Kuzmanovic and Cheng-Kok Koh", title = "How to Improve Your Search Engine Ranking: Myths and Reality", journal = j-TWEB, volume = "8", number = "2", pages = "8:1--8:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2579990", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Search engines have greatly influenced the way people access information on the Internet, as such engines provide the preferred entry point to billions of pages on the Web. Therefore, highly ranked Web pages generally have higher visibility to people and pushing the ranking higher has become the top priority for Web masters. As a matter of fact, Search Engine Optimization (SEO) has became a sizeable business that attempts to improve their clients' ranking. Still, the lack of ways to validate SEO's methods has created numerous myths and fallacies associated with ranking algorithms. In this article, we focus on two ranking algorithms, Google's and Bing's, and design, implement, and evaluate a ranking system to systematically validate assumptions others have made about these popular ranking algorithms. We demonstrate that linear learning models, coupled with a recursive partitioning ranking scheme, are capable of predicting ranking results with high accuracy. As an example, we manage to correctly predict 7 out of the top 10 pages for 78\% of evaluated keywords. Moreover, for content-only ranking, our system can correctly predict 9 or more pages out of the top 10 ones for 77\% of search terms. We show how our ranking system can be used to reveal the relative importance of ranking features in a search engine's ranking function, provide guidelines for SEOs and Web masters to optimize their Web pages, validate or disprove new ranking features, and evaluate search engine ranking results for possible ranking bias.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Sirivianos:2014:LSF, author = "Michael Sirivianos and Kyungbaek Kim and Jian Wei Gan and Xiaowei Yang", title = "Leveraging Social Feedback to Verify Online Identity Claims", journal = j-TWEB, volume = "8", number = "2", pages = "9:1--9:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2543711", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Anonymity is one of the main virtues of the Internet, as it protects privacy and enables users to express opinions more freely. However, anonymity hinders the assessment of the veracity of assertions that online users make about their identity attributes, such as age or profession. We propose FaceTrust, a system that uses online social networks to provide lightweight identity credentials while preserving a user's anonymity. FaceTrust employs a ``game with a purpose'' design to elicit the opinions of the friends of a user about the user's self-claimed identity attributes, and uses attack-resistant trust inference to assign veracity scores to identity attribute assertions. FaceTrust provides credentials, which a user can use to corroborate his assertions. We evaluate our proposal using a live Facebook deployment and simulations on a crawled social graph. The results show that our veracity scores are strongly correlated with the ground truth, even when dishonest users make up a large fraction of the social network and employ the Sybil attack.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Pugliese:2014:EMM, author = "Andrea Pugliese and Matthias Br{\"o}cheler and V. S. Subrahmanian and Michael Ovelg{\"o}nne", title = "Efficient {MultiView} Maintenance under Insertion in Huge Social Networks", journal = j-TWEB, volume = "8", number = "2", pages = "10:1--10:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2541290", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Applications to monitor various aspects of social networks are becoming increasingly popular. For instance, marketers want to look for semantic patterns relating to the content of tweets and Facebook posts relating to their products. Law enforcement agencies want to track behaviors involving potential criminals on the Internet by looking for certain patterns of behavior. Music companies want to track patterns of spread of illegal music. These applications allow multiple users to specify patterns of interest and monitor them in real time as new data gets added to the Web or to a social network. In this article we develop the concept of social network view servers in which all of these types of applications can be simultaneously monitored. The patterns of interest are expressed as views over an underlying graph or social network database. We show that a given set of views can be compiled in multiple possible ways to take advantage of common substructures and define the concept of an optimal merge. Though finding an optimal merge is shown to be NP-hard, we develop the AddView to find very good merges quickly. We develop a very fast MultiView algorithm that scalably and efficiently maintains multiple subgraph views when insertions are made to the social network database. We show that our algorithm is correct, study its complexity, and experimentally demonstrate that our algorithm can scalably handle updates to hundreds of views on 6 real-world social network databases with up to 540M edges.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bislimovska:2014:TCB, author = "Bojana Bislimovska and Alessandro Bozzon and Marco Brambilla and Piero Fraternali", title = "Textual and Content-Based Search in Repositories of {Web} Application Models", journal = j-TWEB, volume = "8", number = "2", pages = "11:1--11:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2579991", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Model-driven engineering relies on collections of models, which are the primary artifacts for software development. To enable knowledge sharing and reuse, models need to be managed within repositories, where they can be retrieved upon users' queries. This article examines two different techniques for indexing and searching model repositories, with a focus on Web development projects encoded in a domain-specific language. Keyword-based and content-based search (also known as query-by-example) are contrasted with respect to the architecture of the system, the processing of models and queries, and the way in which metamodel knowledge can be exploited to improve search. A thorough experimental evaluation is conducted to examine what parameter configurations lead to better accuracy and to offer an insight in what queries are addressed best by each system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bellogin:2014:NSW, author = "Alejandro Bellog{\'\i}n and Pablo Castells and Iv{\'a}n Cantador", title = "Neighbor Selection and Weighting in User-Based Collaborative Filtering: a Performance Prediction Approach", journal = j-TWEB, volume = "8", number = "2", pages = "12:1--12:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2579993", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 1 05:42:19 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "User-based collaborative filtering systems suggest interesting items to a user relying on similar-minded people called neighbors. The selection and weighting of these neighbors characterize the different recommendation approaches. While standard strategies perform a neighbor selection based on user similarities, trust-aware recommendation algorithms rely on other aspects indicative of user trust and reliability. In this article we restate the trust-aware recommendation problem, generalizing it in terms of performance prediction techniques, whose goal is to predict the performance of an information retrieval system in response to a particular query. We investigate how to adopt the preceding generalization to define a unified framework where we conduct an objective analysis of the effectiveness (predictive power) of neighbor scoring functions. The proposed framework enables discriminating whether recommendation performance improvements are caused by the used neighbor scoring functions or by the ways these functions are used in the recommendation computation. We evaluated our approach with several state-of-the-art and novel neighbor scoring functions on three publicly available datasets. By empirically comparing four neighbor quality metrics and thirteen performance predictors, we found strong predictive power for some of the predictors with respect to certain metrics. This result was then validated by checking the final performance of recommendation strategies where predictors are used for selecting and/or weighting user neighbors. As a result, we have found that, by measuring the predictive power of neighbor performance predictors, we are able to anticipate which predictors are going to perform better in neighbor-scoring-powered versions of a user-based collaborative filtering algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Qian:2014:FTD, author = "Yi Qian and Sibel Adali", title = "Foundations of Trust and Distrust in Networks: Extended Structural Balance Theory", journal = j-TWEB, volume = "8", number = "3", pages = "13:1--13:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2628438", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Modeling trust in very large social networks is a hard problem due to the highly noisy nature of these networks that span trust relationships from many different contexts, based on judgments of reliability, dependability, and competence. Furthermore, relationships in these networks vary in their level of strength. In this article, we introduce a novel extension of structural balance theory as a foundational theory of trust and distrust in networks. Our theory preserves the distinctions between trust and distrust as suggested in the literature, but also incorporates the notion of relationship strength that can be expressed as either discrete categorical values, as pairwise comparisons, or as metric distances. Our model is novel, has sound social and psychological basis, and captures the classical balance theory as a special case. We then propose a convergence model, describing how an imbalanced network evolves towards new balance, and formulate the convergence problem of a social network as a Metric Multidimensional Scaling (MDS) optimization problem. Finally, we show how the convergence model can be used to predict edge signs in social networks and justify our theory through extensive experiments on real datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Soi:2014:CDC, author = "Stefano Soi and Florian Daniel and Fabio Casati", title = "Conceptual Development of Custom, Domain-Specific Mashup Platforms", journal = j-TWEB, volume = "8", number = "3", pages = "14:1--14:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2628439", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Despite the common claim by mashup platforms that they enable end-users to develop their own software, in practice end-users still don't develop their own mashups, as the highly technical or inexistent [sic] user bases of today's mashup platforms testify. The key shortcoming of current platforms is their general-purpose nature, that privileges expressive power over intuitiveness. In our prior work, we have demonstrated that a domain-specific mashup approach, which privileges intuitiveness over expressive power, has much more potential to enable end-user development (EUD). The problem is that developing mashup platforms-domain-specific or not-is complex and time consuming. In addition, domain-specific mashup platforms by their very nature target only a small user basis, that is, the experts of the target domain, which makes their development not sustainable if it is not adequately supported and automated. With this article, we aim to make the development of custom, domain-specific mashup platforms cost-effective. We describe a mashup tool development kit (MDK) that is able to automatically generate a mashup platform (comprising custom mashup and component description languages and design-time and runtime environments) from a conceptual design and to provision it as a service. We equip the kit with a dedicated development methodology and demonstrate the applicability and viability of the approach with the help of two case studies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zhang:2014:PBT, author = "Xianchao Zhang and You Wang and Nan Mou and Wenxin Liang", title = "Propagating Both Trust and Distrust with Target Differentiation for Combating Link-Based {Web} Spam", journal = j-TWEB, volume = "8", number = "3", pages = "15:1--15:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2628440", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Semi-automatic anti-spam algorithms propagate either trust through links from a good seed set (e.g., TrustRank) or distrust through inverse links from a bad seed set (e.g., Anti-TrustRank) to the entire Web. These kinds of algorithms have shown their powers in combating link-based Web spam since they integrate both human judgement and machine intelligence. Nevertheless, there is still much space for improvement. One issue of most existing trust/distrust propagation algorithms is that only trust or distrust is propagated and only a good seed set or a bad seed set is used. According to Wu et al. [2006a], a combined usage of both trust and distrust propagation can lead to better results, and an effective framework is needed to realize this insight. Another more serious issue of existing algorithms is that trust or distrust is propagated in nondifferential ways, that is, a page propagates its trust or distrust score uniformly to its neighbors, without considering whether each neighbor should be trusted or distrusted. Such kinds of blind propagating schemes are inconsistent with the original intention of trust/distrust propagation. However, it seems impossible to implement differential propagation if only trust or distrust is propagated. In this article, we take the view that each Web page has both a trustworthy side and an untrustworthy side, and we thusly assign two scores to each Web page: T-Rank, scoring the trustworthiness of the page, and D-Rank, scoring the untrustworthiness of the page. We then propose an integrated framework that propagates both trust and distrust. In the framework, the propagation of T-Rank/D-Rank is penalized by the target's current D-Rank/T-Rank. In other words, the propagation of T-Rank/D-Rank is decided by the target's current (generalized) probability of being trustworthy/untrustworthy; thus a page propagates more trust/distrust to a trustworthy/untrustworthy neighbor than to an untrustworthy/trustworthy neighbor. In this way, propagating both trust and distrust with target differentiation is implemented. We use T-Rank scores to realize spam demotion and D-Rank scores to accomplish spam detection. The proposed Trust-DistrustRank (TDR) algorithm regresses to TrustRank and Anti-TrustRank when the penalty factor is set to 1 and 0, respectively. Thus TDR could be seen as a combinatorial generalization of both TrustRank and Anti-TrustRank. TDR not only makes full use of both trust and distrust propagation, but also overcomes the disadvantages of both TrustRank and Anti-TrustRank. Experimental results on benchmark datasets show that TDR outperforms other semi-automatic anti-spam algorithms for both spam demotion and spam detection tasks under various criteria.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Margaritis:2014:ITI, author = "Giorgos Margaritis and Stergios V. Anastasiadis", title = "Incremental Text Indexing for Fast Disk-Based Search", journal = j-TWEB, volume = "8", number = "3", pages = "16:1--16:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560800", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Real-time search requires to incrementally ingest content updates and almost immediately make them searchable while serving search queries at low latency. This is currently feasible for datasets of moderate size by fully maintaining the index in the main memory of multiple machines. Instead, disk-based methods for incremental index maintenance substantially increase search latency with the index fragmented across multiple disk locations. For the support of fast search over disk-based storage, we take a fresh look at incremental text indexing in the context of current architectural features. We introduce a greedy method called Selective Range Flush (SRF) to contiguously organize the index over disk blocks and dynamically update it at low cost. We show that SRF requires substantial experimental effort to tune specific parameters for performance efficiency. Subsequently, we propose the Unified Range Flush (URF) method, which is conceptually simpler than SRF, achieves similar or better performance with fewer parameters and less tuning, and is amenable to I/O complexity analysis. We implement interesting variations of the two methods in the Proteus prototype search engine that we developed and do extensive experiments with three different Web datasets of size up to 1TB. Across different systems, we show that our methods offer search latency that matches or reduces up to half the lowest achieved by existing disk-based methods. In comparison to an existing method of comparable search latency on the same system, our methods reduce by a factor of 2.0--2.4 the I/O part of build time and by 21--24\% the total build time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Siersdorfer:2014:AMC, author = "Stefan Siersdorfer and Sergiu Chelaru and Jose {San Pedro} and Ismail Sengor Altingovde and Wolfgang Nejdl", title = "Analyzing and Mining Comments and Comment Ratings on the Social {Web}", journal = j-TWEB, volume = "8", number = "3", pages = "17:1--17:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2628441", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "An analysis of the social video sharing platform YouTube and the news aggregator Yahoo! News reveals the presence of vast amounts of community feedback through comments for published videos and news stories, as well as through metaratings for these comments. This article presents an in-depth study of commenting and comment rating behavior on a sample of more than 10 million user comments on YouTube and Yahoo! News. In this study, comment ratings are considered first-class citizens. Their dependencies with textual content, thread structure of comments, and associated content (e.g., videos and their metadata) are analyzed to obtain a comprehensive understanding of the community commenting behavior. Furthermore, this article explores the applicability of machine learning and data mining to detect acceptance of comments by the community, comments likely to trigger discussions, controversial and polarizing content, and users exhibiting offensive commenting behavior. Results from this study have potential application in guiding the design of community-oriented online discussion platforms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Casteleyn:2014:TYR, author = "Sven Casteleyn and Irene Garrig{\'o}s and Jose-Norberto Maz{\'o}n", title = "Ten Years of {Rich Internet Applications}: a Systematic Mapping Study, and Beyond", journal = j-TWEB, volume = "8", number = "3", pages = "18:1--18:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2626369", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The term Rich Internet Applications (RIAs) is generally associated with Web applications that provide the features and functionality of traditional desktop applications. Ten years after the introduction of the term, an ample amount of research has been carried out to study various aspects of RIAs. It has thus become essential to summarize this research and provide an adequate overview. OBJECTIVE. The objective of our study is to assemble, classify, and analyze all RIA research performed in the scientific community, thus providing a consolidated overview thereof, and to identify well-established topics, trends, and open research issues. Additionally, we provide a qualitative discussion of the most interesting findings. This work therefore serves as a reference work for beginning and established RIA researchers alike, as well as for industrial actors that need an introduction in the field, or seek pointers to (a specific subset of) the state-of-the-art. METHOD. A systematic mapping study is performed in order to identify all RIA-related publications, define a classification scheme, and categorize, analyze, and discuss the identified research according to it. RESULTS. Our source identification phase resulted in 133 relevant, peer-reviewed publications, published between 2002 and 2011 in a wide variety of venues. They were subsequently classified according to four facets: development activity, research topic, contribution type, and research type. Pie, stacked bar, and bubble charts were used to depict and analyze the results. A deeper analysis is provided for the most interesting and/or remarkable results. CONCLUSION. Analysis of the results shows that, although the RIA term was coined in 2002, the first RIA-related research appeared in 2004. From 2007 there was a significant increase in research activity, peaking in 2009 and decreasing to pre-2009 levels afterwards. All development phases are covered in the identified research, with emphasis on ``design'' (33\%) and ``implementation'' (29\%). The majority of research proposes a ``method'' (44\%), followed by ``model'' (22\%), ``methodology'' (18\%), and ``tools'' (16\%); no publications in the category ``metrics'' were found. The preponderant research topic is ``models, methods and methodologies'' (23\%) and, to a lesser extent, ``usability and accessibility'' and ``user interface'' (11\% each). On the other hand, the topic ``localization, internationalization and multilinguality'' received no attention at all, and topics such as ``deep Web'' (under 1\%), ``business processing'', ``usage analysis'', ``data management'', ``quality and metrics'' (all under 2\%), ``semantics'', and ``performance'' (slightly above 2\%) received very little attention. Finally, there is a large majority of ``solution proposals'' (66\%), few ``evaluation research'' (14\%), and even fewer ``validation'' (6\%), although the latter have been increasing in recent years.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Dincturk:2014:MBA, author = "Mustafa Emre Dincturk and Guy-Vincent Jourdan and Gregor V. Bochmann and Iosif Viorel Onut", title = "A Model-Based Approach for Crawling {Rich Internet Applications}", journal = j-TWEB, volume = "8", number = "3", pages = "19:1--19:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2626371", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 2 18:17:48 MDT 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "New Web technologies, like AJAX, result in more responsive and interactive Web applications, sometimes called Rich Internet Applications (RIAs). Crawling techniques developed for traditional Web applications are not sufficient for crawling RIAs. The inability to crawl RIAs is a problem that needs to be addressed for at least making RIAs searchable and testable. We present a new methodology, called ``model-based crawling'', that can be used as a basis to design efficient crawling strategies for RIAs. We illustrate model-based crawling with a sample strategy, called the ``hypercube strategy''. The performances of our model-based crawling strategies are compared against existing standard crawling strategies, including breadth-first, depth-first, and a greedy strategy. Experimental results show that our model-based crawling approach is significantly more efficient than these standard strategies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Dragut:2014:MQR, author = "Eduard C. Dragut and Bhaskar Dasgupta and Brian P. Beirne and Ali Neyestani and Badr Atassi and Clement Yu and Weiyi Meng", title = "Merging Query Results From Local Search Engines for Georeferenced Objects", journal = j-TWEB, volume = "8", number = "4", pages = "20:1--20:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2656344", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The emergence of numerous online sources about local services presents a need for more automatic yet accurate data integration techniques. Local services are georeferenced objects and can be queried by their locations on a map, for instance, neighborhoods. Typical local service queries (e.g., ``French Restaurant in The Loop'') include not only information about ``what'' (``French Restaurant'') a user is searching for (such as cuisine) but also ``where'' information, such as neighborhood (``The Loop''). In this article, we address three key problems: query translation, result merging and ranking. Most local search engines provide a (hierarchical) organization of (large) cities into neighborhoods. A neighborhood in one local search engine may correspond to sets of neighborhoods in other local search engines. These make the query translation challenging. To provide an integrated access to the query results returned by the local search engines, we need to combine the results into a single list of results. Our contributions include: (1) An integration algorithm for neighborhoods. (2) A very effective business listing resolution algorithm. (3) A ranking algorithm that takes into consideration the user criteria, user ratings and rankings. We have created a prototype system, Yumi, over local search engines in the restaurant domain. The restaurant domain is a representative case study for the local services. We conducted a comprehensive experimental study to evaluate Yumi. A prototype version of Yumi is available online.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Chen:2014:CCU, author = "Xihui Chen and Jun Pang and Ran Xue", title = "Constructing and Comparing User Mobility Profiles", journal = j-TWEB, volume = "8", number = "4", pages = "21:1--21:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2637483", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Nowadays, the accumulation of people's whereabouts due to location-based applications has made it possible to construct their mobility profiles. This access to users' mobility profiles subsequently brings benefits back to location-based applications. For instance, in on-line social networks, friends can be recommended not only based on the similarity between their registered information, for instance, hobbies and professions but also referring to the similarity between their mobility profiles. In this article, we propose a new approach to construct and compare users' mobility profiles. First, we improve and apply frequent sequential pattern mining technologies to extract the sequences of places that a user frequently visits and use them to model his mobility profile. Second, we present a new method to calculate the similarity between two users using their mobility profiles. More specifically, we identify the weaknesses of a similarity metric in the literature, and propose a new one which not only fixes the weaknesses but also provides more precise and effective similarity estimation. Third, we consider the semantics of spatio-temporal information contained in user mobility profiles and add them into the calculation of user similarity. It enables us to measure users' similarity from different perspectives. Two specific types of semantics are explored in this article: location semantics and temporal semantics. Last, we validate our approach by applying it to two real-life datasets collected by Microsoft Research Asia and Yonsei University, respectively. The results show that our approach outperforms the existing works from several aspects.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Vural:2014:SFW, author = "A. Gural Vural and B. Barla Cambazoglu and Pinar Karagoz", title = "Sentiment-Focused {Web} Crawling", journal = j-TWEB, volume = "8", number = "4", pages = "22:1--22:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2644821", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Sentiments and opinions expressed in Web pages towards objects, entities, and products constitute an important portion of the textual content available in the Web. In the last decade, the analysis of such content has gained importance due to its high potential for monetization. Despite the vast interest in sentiment analysis, somewhat surprisingly, the discovery of sentimental or opinionated Web content is mostly ignored. This work aims to fill this gap and addresses the problem of quickly discovering and fetching the sentimental content present in the Web. To this end, we design a sentiment-focused Web crawling framework. In particular, we propose different sentiment-focused Web crawling strategies that prioritize discovered URLs based on their predicted sentiment scores. Through simulations, these strategies are shown to achieve considerable performance improvement over general-purpose Web crawling strategies in discovery of sentimental Web content.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Kyusakov:2014:EFE, author = "Rumen Kyusakov and Pablo Pu{\~n}al Pereira and Jens Eliasson and Jerker Delsing", title = "{EXIP}: a Framework for Embedded {Web} Development", journal = j-TWEB, volume = "8", number = "4", pages = "23:1--23:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2665068", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Developing and deploying Web applications on networked embedded devices is often seen as a way to reduce the development cost and time to market for new target platforms. However, the size of the messages and the processing requirements of today's Web protocols, such as HTTP and XML, are challenging for the most resource-constrained class of devices that could also benefit from Web connectivity. New Web protocols using binary representations have been proposed for addressing this issue. Constrained Application Protocol (CoAP) reduces the bandwidth and processing requirements compared to HTTP while preserving the core concepts of the Web architecture. Similarly, Efficient XML Interchange (EXI) format has been standardized for reducing the size and processing time for XML structured information. Nevertheless, the adoption of these technologies is lagging behind due to lack of support from Web browsers and current Web development toolkits. Motivated by these problems, this article presents the design and implementation techniques for the EXIP framework for embedded Web development. The framework consists of a highly efficient EXI processor, a tool for EXI data binding based on templates, and a CoAP/EXI/XHTML Web page engine. A prototype implementation of the EXI processor is herein presented and evaluated. It can be applied to Web browsers or thin server platforms using XHTML and Web services for supporting human-machine interactions in the Internet of Things. This article contains four major results: (1) theoretical and practical evaluation of the use of binary protocols for embedded Web programming; (2) a novel method for generation of EXI grammars based on XML Schema definitions; (3) an algorithm for grammar concatenation that produces normalized EXI grammars directly, and hence reduces the number of iterations during grammar generation; (4) an algorithm for efficient representation of possible deviations from the XML schema.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "23", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Thomas:2014:UID, author = "Paul Thomas", title = "Using Interaction Data to Explain Difficulty Navigating Online", journal = j-TWEB, volume = "8", number = "4", pages = "24:1--24:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2656343", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A user's behaviour when browsing a Web site contains clues to that user's experience. It is possible to record some of these behaviours automatically, and extract signals that indicate a user is having trouble finding information. This allows for Web site analytics based on user experiences, not just page impressions. A series of experiments identified user browsing behaviours-such as time taken and amount of scrolling up a page-which predict navigation difficulty and which can be recorded with minimal or no changes to existing sites or browsers. In turn, patterns of page views correlate with these signals and these patterns can help Web authors understand where and why their sites are hard to navigate. A new software tool, ``LATTE,'' automates this analysis and makes it available to Web authors in the context of the site itself.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "24", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{White:2014:CBO, author = "Ryen W. White and Ahmed Hassan", title = "Content Bias in Online Health Search", journal = j-TWEB, volume = "8", number = "4", pages = "25:1--25:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2663355", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Nov 6 16:08:07 MST 2014", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Search engines help people answer consequential questions. Biases in retrieved and indexed content (e.g., skew toward erroneous outcomes that represent deviations from reality), coupled with searchers' biases in how they examine and interpret search results, can lead people to incorrect answers. In this article, we seek to better understand biases in search and retrieval, and in particular those affecting the accuracy of content in search results, including the search engine index, features used for ranking, and the formulation of search queries. Focusing on the important domain of online health search, this research broadens previous work on biases in search to examine the role of search systems in contributing to biases. To assess bias, we focus on questions about medical interventions and employ reliable ground truth data from authoritative medical sources. In the course of our study, we utilize large-scale log analysis using data from a popular Web search engine, deep probes of result lists on that search engine, and crowdsourced human judgments of search result captions and landing pages. Our findings reveal bias in results, amplifying searchers' existing biases that appear evident in their search activity. We also highlight significant bias in indexed content and show that specific ranking signals and specific query terms support bias. Both of these can degrade result accuracy and increase skewness in search results. Our analysis has implications for bias mitigation strategies in online search systems, and we offer recommendations for search providers based on our findings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "25", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Fletcher:2015:EPN, author = "Kenneth K. Fletcher and Xiaoqing F. Liu and Mingdong Tang", title = "Elastic Personalized Nonfunctional Attribute Preference and Trade-off Based Service Selection", journal = j-TWEB, volume = "9", number = "1", pages = "1:1--1:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2697389", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 23 17:41:52 MST 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "For service users to get the best service that meet their requirements, they prefer to personalize their nonfunctional attributes, such as reliability and price. However, the personalization makes it challenging for service providers to completely meet users' preferences, because they have to deal with conflicting nonfunctional attributes when selecting services for users. With this in mind, users may sometimes want to explicitly specify their trade-offs among nonfunctional attributes to make their preferences known to service providers. In this article, we present a novel service selection method based on fuzzy logic that considers users' personalized preferences and their trade-offs on nonfunctional attributes during service selection. The method allows users to represent their elastic nonfunctional requirements and associated importance using linguistic terms to specify their personalized trade-off strategies. We present examples showing how the service selection framework is used and a prototype with real-world airline services to evaluate the proposed framework's application.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zhang:2015:REA, author = "Haibin Zhang and Yan Wang and Xiuzhen Zhang and Ee-Peng Lim", title = "{ReputationPro}: The Efficient Approaches to Contextual Transaction Trust Computation in {E}-Commerce Environments", journal = j-TWEB, volume = "9", number = "1", pages = "2:1--2:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2697390", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 23 17:41:52 MST 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In e-commerce environments, the trustworthiness of a seller is utterly important to potential buyers, especially when a seller is not known to them. Most existing trust evaluation models compute a single value to reflect the general trustworthiness of a seller without taking any transaction context information into account. With such a result as the indication of reputation, a buyer may be easily deceived by a malicious seller in a transaction where the notorious value imbalance problem is involved-in other words, a malicious seller accumulates a high-level reputation by selling cheap products and then deceives buyers by inducing them to purchase more expensive products. In this article, we first present a trust vector consisting of three values for contextual transaction trust (CTT). In the computation of CTT values, three identified important context dimensions, including Product Category, Transaction Amount, and Transaction Time, are taken into account. In the meantime, the computation of each CTT value is based on both past transactions and the forthcoming transaction. In particular, with different parameters specified by a buyer regarding context dimensions, different sets of CTT values can be calculated. As a result, all of these trust values can outline the reputation profile of a seller that indicates the dynamic trustworthiness of a seller in different products, product categories, price ranges, time periods, and any necessary combination of them. We name this new model ReputationPro. Nevertheless, in ReputationPro, the computation of reputation profile requires new data structures for appropriately indexing the precomputation of aggregates over large-scale ratings and transaction data in three context dimensions, as well as novel algorithms for promptly answering buyers' CTT queries. In addition, storing precomputed aggregation results consumes a large volume of space, particularly for a system with millions of sellers. Therefore, reducing storage space for aggregation results is also a great demand. To solve these challenging problems, we first propose a new index scheme CMK-tree by extending the two-dimensional K-D-B-tree that indexes spatial data to support efficient computation of CTT values. Then, we further extend the CMK-tree and propose a CMK-tree$^{RS}$ approach to reducing the storage space allocated to each seller. The two approaches are not only applicable to three context dimensions that are either linear or hierarchical but also take into account the characteristics of the transaction-time model-that is, transaction data is inserted in chronological order. Moreover, the proposed data structures can index each specific product traded in a time period to compute the trustworthiness of a seller in selling a product. Finally, the experimental results illustrate that the CMK-tree is superior in efficiency of computing CTT values to all three existing approaches in the literature. In particular, while answering a buyer's CTT queries for each brand-based product category, the CMK-tree has almost linear query performance. In addition, with significantly reduced storage space, the CMK-tree$^{RS}$ approach can further improve the efficiency in computing CTT values. Therefore, our proposed ReputationPro model is scalable to large-scale e-commerce Web sites in terms of efficiency and storage space consumption.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cai:2015:ALW, author = "Wenbin Cai and Muhan Zhang and Ya Zhang", title = "Active Learning for {Web} Search Ranking via Noise Injection", journal = j-TWEB, volume = "9", number = "1", pages = "3:1--3:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2697391", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 23 17:41:52 MST 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Learning to rank has become increasingly important for many information retrieval applications. To reduce the labeling cost at training data preparation, many active sampling algorithms have been proposed. In this article, we propose a novel active learning-for-ranking strategy called ranking-based sensitivity sampling (RSS), which is tailored for Gradient Boosting Decision Tree (GBDT), a machine-learned ranking method widely used in practice by major commercial search engines for ranking. We leverage the property of GBDT that samples close to the decision boundary tend to be sensitive to perturbations and design the active learning strategy accordingly. We further theoretically analyze the proposed strategy by exploring the connection between the sensitivity used for sample selection and model regularization to provide a potentially theoretical guarantee w.r.t. the generalization capability. Considering that the performance metrics of ranking overweight the top-ranked items, item rank is incorporated into the selection function. In addition, we generalize the proposed technique to several other base learners to show its potential applicability in a wide variety of applications. Substantial experimental results on both the benchmark dataset and a real-world dataset have demonstrated that our proposed active learning strategy is highly effective in selecting the most informative examples.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Gill:2015:CWC, author = "Phillipa Gill and Masashi Crete-Nishihata and Jakub Dalek and Sharon Goldberg and Adam Senft and Greg Wiseman", title = "Characterizing {Web} Censorship Worldwide: Another Look at the {OpenNet} Initiative Data", journal = j-TWEB, volume = "9", number = "1", pages = "4:1--4:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700339", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 23 17:41:52 MST 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In this study, we take another look at 5 years of web censorship data gathered by the OpenNet Initiative in 77 countries using user-based testing with locally relevant content. Prior to our work, this data had been analyzed with little automation, focusing on what content had been blocked, rather than how blocking was carried out. In this study, we use more rigorous automation to obtain a longitudinal, global view of the technical means used for web censorship. We also identify blocking that had been missed in prior analyses. Our results point to considerable variability in the technologies used for web censorship, across countries, time, and types of content, and even across ISPs in the same country. In addition to characterizing web censorship in countries that, thus far, have eluded technical analysis, we also discuss the implications of our observations on the design of future network measurement platforms and circumvention technologies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Fionda:2015:NFL, author = "Valeria Fionda and Giuseppe Pirr{\`o} and Claudio Gutierrez", title = "{NautiLOD}: a Formal Language for the {Web of Data} Graph", journal = j-TWEB, volume = "9", number = "1", pages = "5:1--5:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2697393", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 23 17:41:52 MST 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Web of Linked Data is a huge graph of distributed and interlinked datasources fueled by structured information. This new environment calls for formal languages and tools to automatize navigation across datasources (nodes in such graph) and enable semantic-aware and Web-scale search mechanisms. In this article we introduce a declarative navigational language for the Web of Linked Data graph called N autiLOD. NautiLOD enables one to specify datasources via the intertwining of navigation and querying capabilities. It also features a mechanism to specify actions (e.g., send notification messages) that obtain their parameters from datasources reached during the navigation. We provide a formalization of the NautiLOD semantics, which captures both nodes and fragments of the Web of Linked Data. We present algorithms to implement such semantics and study their computational complexity. We discuss an implementation of the features of NautiLOD in a tool called swget, which exploits current Web technologies and protocols. We report on the evaluation of swget and its comparison with related work. Finally, we show the usefulness of capturing Web fragments by providing examples in different knowledge domains.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Anonymous:2015:E, author = "Anonymous", title = "Editorial", journal = j-TWEB, volume = "9", number = "2", pages = "6:1--6:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2755995", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 27 10:18:18 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Tranquillini:2015:MEI, author = "Stefano Tranquillini and Florian Daniel and Pavel Kucherbaev and Fabio Casati", title = "Modeling, Enacting, and Integrating Custom Crowdsourcing Processes", journal = j-TWEB, volume = "9", number = "2", pages = "7:1--7:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2746353", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 27 10:18:18 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Crowdsourcing (CS) is the outsourcing of a unit of work to a crowd of people via an open call for contributions. Thanks to the availability of online CS platforms, such as Amazon Mechanical Turk or CrowdFlower, the practice has experienced a tremendous growth over the past few years and demonstrated its viability in a variety of fields, such as data collection and analysis or human computation. Yet it is also increasingly struggling with the inherent limitations of these platforms: each platform has its own logic of how to crowdsource work (e.g., marketplace or contest), there is only very little support for structured work (work that requires the coordination of multiple tasks), and it is hard to integrate crowdsourced tasks into state-of-the-art business process management (BPM) or information systems. We attack these three shortcomings by (1) developing a flexible CS platform (we call it Crowd Computer, or CC) that allows one to program custom CS logics for individual and structured tasks, (2) devising a BPMN--based modeling language that allows one to program CC intuitively, (3) equipping the language with a dedicated visual editor, and (4) implementing CC on top of standard BPM technology that can easily be integrated into existing software and processes. We demonstrate the effectiveness of the approach with a case study on the crowd-based mining of mashup model patterns.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Diaz:2015:AWR, author = "Oscar D{\'\i}az and Crist{\'o}bal Arellano", title = "The Augmented {Web}: Rationales, Opportunities, and Challenges on Browser-Side Transcoding", journal = j-TWEB, volume = "9", number = "2", pages = "8:1--8:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2735633", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 27 10:18:18 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Today's web personalization technologies use approaches like user categorization, configuration, and customization but do not fully support individualized requirements. As a significant portion of our social and working interactions are migrating to the web, we can expect an increase in these kinds of minority requirements. Browser-side transcoding holds the promise of facilitating this aim by opening personalization to third parties through web augmentation (WA), realized in terms of extensions and userscripts. WA is to the web what augmented reality is to the physical world: to layer relevant content/layout/navigation over the existing web to improve the user experience. From this perspective, WA is not as powerful as web personalization since its scope is limited to the surface of the web. However, it permits this surface to be tuned by developers other than the sites' webmasters. This opens up the web to third parties who might come up with imaginative ways of adapting the web surface for their own purposes. Its success is backed up by millions of downloads. This work looks at this phenomenon, delving into the ``what,'' the ``why,'' and the ``what for'' of WA, and surveys the challenges ahead for WA to thrive. To this end, we appraise the most downloaded 45 WA extensions for Mozilla Firefox and Google Chrome as well as conduct a systematic literature review to identify what quality issues received the most attention in the literature. The aim is to raise awareness about WA as a key enabler of the personal web and point out research directions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Sun:2015:ITB, author = "Chang-Ai Sun and Xin Zhang and Yan Shang and Marco Aiello", title = "Integrating Transactions into {BPEL} Service Compositions: an Aspect-Based Approach", journal = j-TWEB, volume = "9", number = "2", pages = "9:1--9:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2757288", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 27 10:18:18 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The concept of software as a service has been increasingly adopted to develop distributed applications. Ensuring the reliability of loosely coupled compositions is a challenging task because of the open, dynamic, and independent nature of composable services; this is especially true when the execution of a service-based process relies on independent but correlated services. Transactions are the prototypical case of compositions spanning across multiple services and needing properties to be valid throughout the whole execution. Although transaction protocols and service composition languages have been proposed in the past decade, a true viable and effective solution is still missing. In this article, we propose a systematic aspect-based approach to integrating transactions into service compositions, taking into account the well-known protocols: Web Service Transaction and Business Process Execution Language (BPEL). In our approach, transaction policies are first defined as a set of aspects. They are then converted to standard BPEL elements. Finally, these transaction-related elements and the original BPEL process are weaved together, resulting in a transactional executable BPEL process. At runtime, transaction management is the responsibility of a middleware, which implements the coordination framework and transaction protocols followed by the transactional BPEL process and transaction-aware Web services. To automate the proposed approach, we developed a supporting platform called Salan to aid the tasks of defining, validating, and weaving aspect-based transaction policies, and of deploying the transactional BPEL processes. By means of a case study, we demonstrate the proposed approach and evaluate the performance of the supporting platform. Experimental results show that this approach is effective in producing reliable business processes while reducing the need for direct human involvement.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Kwasnikowska:2015:FAO, author = "Natalia Kwasnikowska and Luc Moreau and Jan {Van Den Bussche}", title = "A Formal Account of the Open Provenance Model", journal = j-TWEB, volume = "9", number = "2", pages = "10:1--10:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2734116", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 27 10:18:18 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "On the Web, where resources such as documents and data are published, shared, transformed, and republished, provenance is a crucial piece of metadata that would allow users to place their trust in the resources they access. The open provenance model (OPM) is a community data model for provenance that is designed to facilitate the meaningful interchange of provenance information between systems. Underpinning OPM is a notion of directed graph, where nodes represent data products and processes involved in past computations and edges represent dependencies between them; it is complemented by graphical inference rules allowing new dependencies to be derived. Until now, however, the OPM model was a purely syntactical endeavor. The present article extends OPM graphs with an explicit distinction between precise and imprecise edges. Then a formal semantics for the thus enriched OPM graphs is proposed, by viewing OPM graphs as temporal theories on the temporal events represented in the graph. The original OPM inference rules are scrutinized in view of the semantics and found to be sound but incomplete. An extended set of graphical rules is provided and proved to be complete for inference. The article concludes with applications of the formal semantics to inferencing in OPM graphs, operators on OPM graphs, and a formal notion of refinement among OPM graphs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cappiello:2015:UCA, author = "Cinzia Cappiello and Maristella Matera and Matteo Picozzi", title = "A {UI}-Centric Approach for the End-User Development of Multidevice Mashups", journal = j-TWEB, volume = "9", number = "3", pages = "11:1--11:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2735632", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In recent years, models, composition paradigms, and tools for mashup development have been proposed to support the integration of information sources, services and APIs available on the Web. The challenge is to provide a gate to a ``programmable Web,'' where end users are allowed to construct easily composite applications that merge content and functions so as to satisfy the long tail of their specific needs. The approaches proposed so far do not fully accommodate this vision. This article, therefore, proposes a mashup development framework that is oriented toward the End-User Development. Given the fundamental role of user interfaces (UIs) as a medium easily understandable by the end users, the proposed approach is characterized by UI-centric models able to support a WYSIWYG (What You See Is What You Get) specification of data integration and service orchestration. It, therefore, contributes to the definition of adequate abstractions that, by hiding the technology and implementation complexity, can be adopted by the end users in a kind of ``democratic'' paradigm for mashup development. This article also shows how model-to-code generative techniques translate models into application schemas, which in turn guide the dynamic instantiation of the composite applications at runtime. This is achieved through lightweight execution environments that can be deployed on the Web and on mobile devices to support the pervasive use of the created applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zafar:2015:SCO, author = "Muhammad Bilal Zafar and Parantapa Bhattacharya and Niloy Ganguly and Krishna P. Gummadi and Saptarshi Ghosh", title = "Sampling Content from Online Social Networks: Comparing Random vs. Expert Sampling of the {Twitter} Stream", journal = j-TWEB, volume = "9", number = "3", pages = "12:1--12:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2743023", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Analysis of content streams gathered from social networking sites such as Twitter has several applications ranging from content search and recommendation, news detection to business analytics. However, processing large amounts of data generated on these sites in real-time poses a difficult challenge. To cope with the data deluge, analytics companies and researchers are increasingly resorting to sampling. In this article, we investigate the crucial question of how to sample content streams generated by users in online social networks. The traditional method is to randomly sample all the data. For example, most studies using Twitter data today rely on the 1\% and 10\% randomly sampled streams of tweets that are provided by Twitter. In this paper, we analyze a different sampling methodology, one where content is gathered only from a relatively small sample ($< 1\%$) of the user population, namely, the expert users. Over the duration of a month, we gathered tweets from over 500,000 Twitter users who are identified as experts on a diverse set of topics, and compared the resulting expert sampled tweets with the 1\% randomly sampled tweets provided publicly by Twitter. We compared the sampled datasets along several dimensions, including the popularity, topical diversity, trustworthiness, and timeliness of the information contained within them, and on the sentiment/opinion expressed on specific topics. Our analysis reveals several important differences in data obtained through the different sampling methodologies, which have serious implications for applications such as topical search, trustworthy content recommendations, breaking news detection, and opinion mining.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wang:2015:SWU, author = "Yazhe Wang and Jamie Callan and Baihua Zheng", title = "Should We Use the Sample? {Analyzing} Datasets Sampled from {Twitter}'s Stream {API}", journal = j-TWEB, volume = "9", number = "3", pages = "13:1--13:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2746366", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Researchers have begun studying content obtained from microblogging services such as Twitter to address a variety of technological, social, and commercial research questions. The large number of Twitter users and even larger volume of tweets often make it impractical to collect and maintain a complete record of activity; therefore, most research and some commercial software applications rely on samples, often relatively small samples, of Twitter data. For the most part, sample sizes have been based on availability and practical considerations. Relatively little attention has been paid to how well these samples represent the underlying stream of Twitter data. To fill this gap, this article performs a comparative analysis on samples obtained from two of Twitter's streaming APIs with a more complete Twitter dataset to gain an in-depth understanding of the nature of Twitter data samples and their potential for use in various data mining tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Su:2015:RRT, author = "Zhiyuan Su and Ling Liu and Mingchu Li and Xinxin Fan and Yang Zhou", title = "Reliable and Resilient Trust Management in Distributed Service Provision Networks", journal = j-TWEB, volume = "9", number = "3", pages = "14:1--14:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2754934", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Distributed service networks are popular platforms for service providers to offer services to consumers and for service consumers to acquire services from unknown parties. eBay and Amazon are two well-known examples of enabling and hosting such service networks to connect service providers to service consumers. Trust management is a critical component for scaling such distributed service networks to a large and growing number of participants. In this article, we present ServiceTrust$^{++}$, a feedback quality--sensitive and attack resilient trust management scheme for empowering distributed service networks with effective trust management capability. Compared with existing trust models, ServiceTrust$^{++}$ has several novel features. First, we present six attack models to capture both independent and colluding attacks with malicious cliques, malicious spies, and malicious camouflages. Second, we aggregate the feedback ratings based on the variances of participants' feedback behaviors and incorporate feedback similarity as weight into the local trust algorithm. Third, we compute the global trust of a participant by employing conditional trust propagation based on the feedback similarity threshold. This allows ServiceTrust$^{++}$ to control and prevent malicious spies and malicious camouflage peers from boosting their global trust scores by manipulating the feedback ratings of good peers and by taking advantage of the uniform trust propagation. Finally, we systematically combine a trust-decaying strategy with a threshold value--based conditional trust propagation to further strengthen the robustness of our global trust computation against sophisticated malicious feedback. Experimental evaluation with both simulation-based networks and real network dataset Epinion show that ServiceTrust$^{++}$ is highly resilient against all six attack models and highly effective compared to EigenTrust, the most popular and representative trust propagation model to date.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Calzavara:2015:SLA, author = "Stefano Calzavara and Gabriele Tolomei and Andrea Casini and Michele Bugliesi and Salvatore Orlando", title = "A Supervised Learning Approach to Protect Client Authentication on the {Web}", journal = j-TWEB, volume = "9", number = "3", pages = "15:1--15:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2754933", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Browser-based defenses have recently been advocated as an effective mechanism to protect potentially insecure web applications against the threats of session hijacking, fixation, and related attacks. In existing approaches, all such defenses ultimately rely on client-side heuristics to automatically detect cookies containing session information, to then protect them against theft or otherwise unintended use. While clearly crucial to the effectiveness of the resulting defense mechanisms, these heuristics have not, as yet, undergone any rigorous assessment of their adequacy. In this article, we conduct the first such formal assessment, based on a ground truth of 2,464 cookies we collect from 215 popular websites of the Alexa ranking. To obtain the ground truth, we devise a semiautomatic procedure that draws on the novel notion of authentication token, which we introduce to capture multiple web authentication schemes. We test existing browser-based defenses in the literature against our ground truth, unveiling several pitfalls both in the heuristics adopted and in the methods used to assess them. We then propose a new detection method based on supervised learning, where our ground truth is used to train a set of binary classifiers, and report on experimental evidence that our method outperforms existing proposals. Interestingly, the resulting classifiers, together with our hands-on experience in the construction of the ground truth, provide new insight on how web authentication is actually implemented in practice.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Lee:2015:DPM, author = "Sihyung Lee", title = "Detection of Political Manipulation in Online Communities through Measures of Effort and Collaboration", journal = j-TWEB, volume = "9", number = "3", pages = "16:1--16:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2767134", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Aug 7 10:27:41 MDT 2015", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Online social media allow users to interact with one another by sharing opinions, and these opinions have a critical impact on the way readers think and behave. Accordingly, an increasing number of {$<$ i$>$ manipulators$<$}/{i$>$} deliberately spread messages to influence the public, often in an organized manner. In particular, political manipulation-manipulation of opponents to win political advantage-can result in serious consequences: antigovernment riots can break out, leading to candidates' defeat in an election. A few approaches have been proposed to detect such manipulation based on the level of social interaction (i.e., manipulators actively post opinions but infrequently befriend and reply to other users). However, several studies have shown that the interactions can be forged at a low cost and thus may not be effective measures of manipulation. To go one step further, we collect a dataset for real, large-scale political manipulation, which consists of opinions found on Internet forums. These opinions are divided into manipulators and nonmanipulators. Using this collection, we demonstrate that manipulators inevitably work hard, in teams, to quickly influence a large audience. With this in mind, it could be said that a high level of collaborative efforts strongly indicates manipulation. For example, a group of manipulators may jointly post numerous opinions with a consistent theme and selectively recommend the same, well-organized opinion to promote its rank. We show that the effort measures, when combined with a supervised learning algorithm, successfully identify greater than 95\% of the manipulators. We believe that the proposed method will help system administrators to accurately detect manipulators in disguise, significantly decreasing the intensity of manipulation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Gollapalli:2015:IRH, author = "Sujatha Das Gollapalli and Cornelia Caragea and Prasenjit Mitra and C. Lee Giles", title = "Improving Researcher Homepage Classification with Unlabeled Data", journal = j-TWEB, volume = "9", number = "4", pages = "17:1--17:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2767135", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 25 07:43:09 MST 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/hash.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A classifier that determines if a webpage is relevant to a specified set of topics comprises a key component for focused crawling. Can a classifier that is tuned to perform well on training datasets continue to filter out irrelevant pages in the face of changing content on the Web? We investigate this question in the context of identifying researcher homepages. We show experimentally that classifiers trained on existing datasets of academic homepages underperform on ``non-homepages'' present on current-day academic websites. As an alternative to obtaining labeled datasets to retrain classifiers for the new content, in this article we ask the following question: ``How can we effectively use the unlabeled data readily available from academic websites to improve researcher homepage classification?'' We design novel URL-based features and use them in conjunction with content-based features for representing homepages. Within the co-training framework, these sets of features can be treated as complementary views enabling us to effectively use unlabeled data and obtain remarkable improvements in homepage identification on the current-day academic websites. We also propose a novel technique for ``learning a conforming pair of classifiers'' that mimics co-training. Our algorithm seeks to minimize a loss (objective) function quantifying the difference in predictions from the two views afforded by co-training. We argue that this loss formulation provides insights for understanding co-training and can be used even in the absence of a validation dataset. Our next set of findings pertains to the evaluation of other state-of-the-art techniques for classifying homepages. First, we apply feature selection (FS) and feature hashing (FH) techniques independently and in conjunction with co-training to academic homepages. FS is a well-known technique for removing redundant and unnecessary features from the data representation, whereas FH is a technique that uses hash functions for efficient encoding of features. We show that FS can be effectively combined with co-training to obtain further improvements in identifying homepages. However, using hashed feature representations, a performance degradation is observed possibly due to feature collisions. Finally, we evaluate other semisupervised algorithms for homepage classification. We show that although several algorithms are effective in using information from the unlabeled instances, co-training that explicitly harnesses the feature split in the underlying instances outperforms approaches that combine content and URL features into a single view.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wang:2015:DCU, author = "Jing Wang and Clement T. Yu and Philip S. Yu and Bing Liu and Weiyi Meng", title = "Diversionary Comments under Blog Posts", journal = j-TWEB, volume = "9", number = "4", pages = "18:1--18:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2789211", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 25 07:43:09 MST 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "There has been a recent swell of interest in the analysis of blog comments. However, much of the work focuses on detecting comment spam in the blogsphere. An important issue that has been neglected so far is the identification of diversionary comments. Diversionary comments are defined as comments that divert the topic from the original post. A possible purpose is to distract readers from the original topic and draw attention to a new topic. We categorize diversionary comments into five types based on our observations and propose an effective framework to identify and flag them. To the best of our knowledge, the problem of detecting diversionary comments has not been studied so far. We solve the problem in two different ways: (i) rank all comments in descending order of being diversionary and (ii) consider it as a classification problem. Our evaluation on 4,179 comments under 40 different blog posts from Digg and Reddit shows that the proposed method achieves the high mean average precision of 91.9\% when the problem is considered as a ranking problem and 84.9\% of F-measure as a classification problem. Sensitivity analysis indicates that the effectiveness of the method is stable under different parameter settings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Katzir:2015:ECC, author = "Liran Katzir and Stephen J. Hardiman", title = "Estimating Clustering Coefficients and Size of Social Networks via Random Walk", journal = j-TWEB, volume = "9", number = "4", pages = "19:1--19:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2790304", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 25 07:43:09 MST 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This work addresses the problem of estimating social network measures. Specifically, the measures at hand are the network average and global clustering coefficients and the number of registered users. The algorithms at hand (1) assume no prior knowledge about the network and (2) access the network using only the publicly available interface. More precisely, this work provides (a) a unified approach for clustering coefficients estimation and (b) a new network size estimator. The unified approach for the clustering coefficients yields the first external access algorithm for estimating the global clustering coefficient. The new network size estimator offers improved accuracy compared to prior art estimators. Our approach is to view a social network as an undirected graph and use the public interface to retrieve a random walk. To estimate the clustering coefficient, the connectivity of each node in the random walk sequence is tested in turn. We show that the error drops exponentially in the number of random walk steps. For the network size estimation we offer a generalized view of prior art estimators that in turn yields an improved estimator. All algorithms are validated on several publicly available social network datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Watanabe:2015:FQM, author = "Willian Massami Watanabe and Ana Luiza Dias and Renata Pontin {De Mattos Fortes}", title = "{Fona}: Quantitative Metric to Measure Focus Navigation on Rich {Internet} Applications", journal = j-TWEB, volume = "9", number = "4", pages = "20:1--20:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2812812", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 25 07:43:09 MST 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Web 2.0 brought new requirements to the architecture of web systems. Web applications' interfaces are becoming more and more interactive. However, these changes are severely impacting how disabled users interact through assistive technologies with the web. In order to deploy an accessible web application, developers can use WAI-ARIA to design an accessible web application, which manually implements focus and keyboard navigation mechanisms. This article presents a quantitative metric, named Fona, which measures how the Focus Navigation WAI-ARIA requirement has been implemented on the web. Fona counts JavaScript mouse event listeners, HTML elements with role attributes, and TabIndex attributes in the DOM structure of webpages. Fona's evaluation approach provides a narrow analysis of one single accessibility requirement. But it enables monitoring this accessibility requirement in a large number of webpages. This monitoring activity might be used to give insights about how Focus Navigation and ARIA requirements have been considered by web development teams. Fona is validated comparing the results of a set of WAI-ARIA conformant implementations and a set of webpages formed by Alexa's 349 top most popular websites. The analysis of Fona's value for Alexa's websites highlights that many websites still lack the implementation of Focus Navigation through their JavaScript interactive content.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Marszalkowski:2016:ASC, author = "Jakub Marszalkowski and Jan Mizgajski and Dariusz Mokwa and Maciej Drozdowski", title = "Analysis and Solution of {CSS}-Sprite Packing Problem", journal = j-TWEB, volume = "10", number = "1", pages = "1:1--1:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2818377", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A CSS-sprite packing problem is considered in this article. CSS-sprite is a technique of combining many pictures of a web page into one image for the purpose of reducing network transfer time. The CSS-sprite packing problem is formulated here as an optimization challenge. The significance of geometric packing, image compression and communication performance is discussed. A mathematical model for constructing multiple sprites and optimization of load time is proposed. The impact of PNG-sprite aspect ratio on file size is studied experimentally. Benchmarking of real user web browsers communication performance covers latency, bandwidth, number of concurrent channels as well as speedup from parallel download. Existing software for building CSS-sprites is reviewed. A novel method, called Spritepack, is proposed and evaluated. Spritepack outperforms current software.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Amor:2016:DBT, author = "Iheb Ben Amor and Salima Benbernou and Mourad Ouziri and Zaki Malik and Brahim Medjahed", title = "Discovering Best Teams for Data Leak-Aware Crowdsourcing in Social Networks", journal = j-TWEB, volume = "10", number = "1", pages = "2:1--2:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2814573", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Crowdsourcing is emerging as a powerful paradigm to help perform a wide range of tedious tasks in various enterprise applications. As such applications become more complex, crowdsourcing systems often require the collaboration of several experts connected through professional/social networks and organized in various teams. For instance, a well-known car manufacturer asked fans to contribute ideas for the kinds of technologies that should be incorporated into one of its cars. For that purpose, fans needed to collaborate and form teams competing with each others to come up with the best ideas. However, once teams are formed, each one would like to provide the best solution and treat that solution as a ``trade secret,'' hence preventing any data leak to its competitors (i.e., the other teams). In this article, we propose a data leak--aware crowdsourcing system called SocialCrowd. We introduce a clustering algorithm that uses social relationships between crowd workers to discover all possible teams while avoiding interteam data leakage. We also define a ranking mechanism to select the ``best'' team configurations. Our mechanism is based on the semiring approach defined in the area of soft constraints programming. Finally, we present experiments to assess the efficiency of the proposed approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Song:2016:IJV, author = "Hengjie Song and Yonghui Xu and Huaqing Min and Qingyao Wu and Wei Wei and Jianshu Weng and Xiaogang Han and Qiang Yang and Jialiang Shi and Jiaqian Gu and Chunyan Miao and Nishida Toyoaki", title = "Individual Judgments Versus Consensus: Estimating Query-{URL} Relevance", journal = j-TWEB, volume = "10", number = "1", pages = "3:1--3:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2834122", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Query-URL relevance, measuring the relevance of each retrieved URL with respect to a given query, is one of the fundamental criteria to evaluate the performance of commercial search engines. The traditional way to collect reliable and accurate query-URL relevance requires multiple annotators to provide their individual judgments based on their subjective expertise (e.g., understanding of user intents). In this case, the annotators' subjectivity reflected in each annotator individual judgment (AIJ) inevitably affects the quality of the ground truth relevance (GTR). But to the best of our knowledge, the potential impact of AIJs on estimating GTRs has not been studied and exploited quantitatively by existing work. This article first studies how multiple AIJs and GTRs are correlated. Our empirical studies find that the multiple AIJs possibly provide more cues to improve the accuracy of estimating GTRs. Inspired by this finding, we then propose a novel approach to integrating the multiple AIJs with the features characterizing query-URL pairs for estimating GTRs more accurately. Furthermore, we conduct experiments in a commercial search engine-Baidu.com-and report significant gains in terms of the normalized discounted cumulative gains.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zhang:2016:DSP, author = "Xianchao Zhang and Zhaoxing Li and Shaoping Zhu and Wenxin Liang", title = "Detecting Spam and Promoting Campaigns in {Twitter}", journal = j-TWEB, volume = "10", number = "1", pages = "4:1--4:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2846102", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Twitter has become a target platform for both promoters and spammers to disseminate their messages, which are more harmful than traditional spamming methods, such as email spamming. Recently, large amounts of campaigns that contain lots of spam or promotion accounts have emerged in Twitter. The campaigns cooperatively post unwanted information, and thus they can infect more normal users than individual spam or promotion accounts. Organizing or participating in campaigns has become the main technique to spread spam or promotion information in Twitter. Since traditional solutions focus on checking individual accounts or messages, efficient techniques for detecting spam and promotion campaigns in Twitter are urgently needed. In this article, we propose a framework to detect both spam and promotion campaigns. Our framework consists of three steps: the first step links accounts who post URLs for similar purposes; the second step extracts candidate campaigns that may be for spam or promotion purposes; and the third step classifies the candidate campaigns into normal, spam, and promotion groups. The key point of the framework is how to measure the similarity between accounts' purposes of posting URLs. We present two measure methods based on Shannon information theory: the first one uses the URLs posted by the users, and the second one considers both URLs and timestamps. Experimental results demonstrate that the proposed methods can extract the majority of the candidate campaigns correctly, and detect promotion and spam campaigns with high precision and recall.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Eshuis:2016:FCE, author = "Rik Eshuis and Freddy L{\'e}cu{\'e} and Nikolay Mehandjiev", title = "Flexible Construction of Executable Service Compositions from Reusable Semantic Knowledge", journal = j-TWEB, volume = "10", number = "1", pages = "5:1--5:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2842628", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Most service composition approaches rely on top-down decomposition of a problem and AI-style planning to assemble service components into a meaningful whole, impeding reuse and flexibility. In this article, we propose an approach that starts from declarative knowledge about the semantics of individual service components and algorithmically constructs a full-blown service orchestration process that supports sequence, choice, and parallelism. The output of our algorithm can be mapped directly into a number of service orchestration languages such as OWL-S and BPEL. The approach consists of two steps. First, semantic links specifying data dependencies among the services are derived and organized in a flexible network. Second, based on a user request indicating the desired outcomes from the composition, an executable composition is constructed from the network that satisfies the dependencies. The approach is unique in producing complex compositions out of semantic links between services in a flexible way. It also allows reusing knowledge about semantic dependencies in the network to generate new compositions through new requests and modification of services at runtime. The approach has been implemented in a prototype that outperforms related composition prototypes in experiments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Avila:2016:WTC, author = "Bruno T. {\'A}vila and Rafael D. Lins", title = "W-tree: a Compact External Memory Representation for Webgraphs", journal = j-TWEB, volume = "10", number = "1", pages = "6:1--6:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2835181", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "World Wide Web applications need to use, constantly update, and maintain large webgraphs for executing several tasks, such as calculating the web impact factor, finding hubs and authorities, performing link analysis by webometrics tools, and ranking webpages by web search engines. Such webgraphs need to use a large amount of main memory, and, frequently, they do not completely fit in, even if compressed. Therefore, applications require the use of external memory. This article presents a new compact representation for webgraphs, called w-tree, which is designed specifically for external memory. It supports the execution of basic queries (e.g., full read, random read, and batch random read), set-oriented queries (e.g., superset, subset, equality, overlap, range, inlink, and co-inlink), and some advanced queries, such as edge reciprocal and hub and authority. Furthermore, a new layout tree designed specifically for webgraphs is also proposed, reducing the overall storage cost and allowing the random read query to be performed with an asymptotically faster runtime in the worst case. To validate the advantages of the w-tree, a series of experiments are performed to assess an implementation of the w-tree comparing it to a compact main memory representation. The results obtained show that w-tree is competitive in compression time and rate and in query time, which may execute several orders of magnitude faster for set-oriented queries than its competitors. The results provide empirical evidence that it is feasible to use a compact external memory representation for webgraphs in real applications, contradicting the previous assumptions made by several researchers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wang:2016:STQ, author = "Xinyu Wang and Jianke Zhu and Zibin Zheng and Wenjie Song and Yuanhong Shen and Michael R. Lyu", title = "A Spatial-Temporal {QoS} Prediction Approach for Time-aware {Web} Service Recommendation", journal = j-TWEB, volume = "10", number = "1", pages = "7:1--7:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2801164", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Due to the popularity of service-oriented architectures for various distributed systems, an increasing number of Web services have been deployed all over the world. Recently, Web service recommendation became a hot research topic, one that aims to accurately predict the quality of functional satisfactory services for each end user. Generally, the performance of Web service changes over time due to variations of service status and network conditions. Instead of employing the conventional temporal models, we propose a novel spatial-temporal QoS prediction approach for time-aware Web service recommendation, where a sparse representation is employed to model QoS variations. Specifically, we make a zero-mean Laplace prior distribution assumption on the residuals of the QoS prediction, which corresponds to a Lasso regression problem. To effectively select the nearest neighbor for the sparse representation of temporal QoS values, the geo-location of web service is employed to reduce searching range while improving prediction accuracy. The extensive experimental results demonstrate that the proposed approach outperforms state-of-art methods with more than 10\% improvement on the accuracy of temporal QoS prediction for time-aware Web service recommendation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Guo:2016:NEB, author = "Guibing Guo and Jie Zhang and Neil Yorke-Smith", title = "A Novel Evidence-Based {Bayesian} Similarity Measure for Recommender Systems", journal = j-TWEB, volume = "10", number = "2", pages = "8:1--8:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2856037", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "User-based collaborative filtering, a widely used nearest neighbour-based recommendation technique, predicts an item's rating by aggregating its ratings from similar users. User similarity is traditionally calculated by cosine similarity or the Pearson correlation coefficient. However, both of these measures consider only the direction of rating vectors, and suffer from a range of drawbacks. To overcome these issues, we propose a novel Bayesian similarity measure based on the Dirichlet distribution, taking into consideration both the direction and length of rating vectors. We posit that not all the rating pairs should be equally counted in order to accurately model user correlation. Three different evidence factors are designed to compute the weights of rating pairs. Further, our principled method reduces correlation due to chance and potential system bias. Experimental results on six real-world datasets show that our method achieves superior accuracy in comparison with counterparts.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Frattolillo:2016:BFM, author = "Franco Frattolillo", title = "A Buyer-Friendly and Mediated Watermarking Protocol for {Web} Context", journal = j-TWEB, volume = "10", number = "2", pages = "9:1--9:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2856036", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Watermarking protocols are used in conjunction with digital watermarking techniques to protect digital copyright on the Internet. They define the schemes of the web transactions by which buyers can purchase protected digital content distributed by content providers in a secure manner. Over the last few years, significant examples of watermarking protocols have been proposed in literature. However, a detailed examination of such protocols has revealed a number of problems that have to be addressed in order to make them suited for current web context. Therefore, based on the most relevant problems derived from literature, this article identifies the main challenges posed by the development of watermarking protocols for web context and presents a watermarking protocol that follows a new secure, buyer-centric and mediated design approach able to meet such challenges.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wu:2016:QDQ, author = "Wensheng Wu and Weiyi Meng and Weifeng Su and Guangyou Zhou and Yao-Yi Chiang", title = "{Q2P}: Discovering Query Templates via Autocompletion", journal = j-TWEB, volume = "10", number = "2", pages = "10:1--10:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2873061", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We present Q2P, a system that discovers query templates from search engines via their query autocompletion services. Q2P is distinct from the existing works in that it does not rely on query logs of search engines that are typically not readily available. Q2P is also unique in that it uses a trie to economically store queries sampled from a search engine and employs a beam-search strategy that focuses the expansion of the trie on its most promising nodes. Furthermore, Q2P leverages the trie-based storage of query sample to discover query templates using only two passes over the trie. Q2P is a key part of our ongoing project Deep2Q on a template-driven data integration on the Deep Web, where the templates learned by Q2P are used to guide the integration process in Deep2Q. Experimental results on four major search engines indicate that (1) Q2P sends only a moderate number of queries (ranging from 597 to 1,135) to the engines, while obtaining a significant number of completions per query (ranging from 4.2 to 8.5 on the average); (2) a significant number of templates (ranging from 8 to 32 when the minimum support for frequent templates is set to 1\%) may be discovered from the samples.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Walk:2016:ADC, author = "Simon Walk and Denis Helic and Florian Geigl and Markus Strohmaier", title = "Activity Dynamics in Collaboration Networks", journal = j-TWEB, volume = "10", number = "2", pages = "11:1--11:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2873060", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Many online collaboration networks struggle to gain user activity and become self-sustaining due to the ramp-up problem or dwindling activity within the system. Prominent examples include online encyclopedias such as (Semantic) MediaWikis, Question and Answering portals such as StackOverflow, and many others. Only a small fraction of these systems manage to reach self-sustaining activity, a level of activity that prevents the system from reverting to a nonactive state. In this article, we model and analyze activity dynamics in synthetic and empirical collaboration networks. Our approach is based on two opposing and well-studied principles: (i) without incentives, users tend to lose interest to contribute and thus, systems become inactive, and (ii) people are susceptible to actions taken by their peers (social or peer influence). With the activity dynamics model that we introduce in this article we can represent typical situations of such collaboration networks. For example, activity in a collaborative network, without external impulses or investments, will vanish over time, eventually rendering the system inactive. However, by appropriately manipulating the activity dynamics and/or the underlying collaboration networks, we can jump-start a previously inactive system and advance it toward an active state. To be able to do so, we first describe our model and its underlying mechanisms. We then provide illustrative examples of empirical datasets and characterize the barrier that has to be breached by a system before it can become self-sustaining in terms of critical mass and activity dynamics. Additionally, we expand on this empirical illustration and introduce a new metric $p$ --- the Activity Momentum --- to assess the activity robustness of collaboration networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zheng:2016:PQA, author = "Huiyuan Zheng and Jian Yang and Weiliang Zhao", title = "Probabilistic {QoS} Aggregations for Service Composition", journal = j-TWEB, volume = "10", number = "2", pages = "12:1--12:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2876513", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In this article, we propose a comprehensive approach for Quality of Service (QoS) calculation in service composition. Differing from the existing work on QoS aggregations that represent QoS as single values, discrete values with frequencies, or standard statistical distributions, the proposed approach has the capability to handle any type of QoS probability distribution. A set of formulae and algorithms are developed to calculate the QoS of a composite service according to four identified basic patterns as sequential, parallel, conditional, and loop. We demonstrate that the proposed QoS calculation method is much more efficient than existing simulation methods. It has a high scalability and builds a solid foundation for real-time QoS analysis and prediction in service composition. Experiment results are provided to show the effectiveness and efficiency of the proposed method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Paul:2016:SBC, author = "Michael J. Paul and Ryen W. White and Eric Horvitz", title = "Search and Breast Cancer: On Episodic Shifts of Attention over Life Histories of an Illness", journal = j-TWEB, volume = "10", number = "2", pages = "13:1--13:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2893481", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We seek to understand the evolving needs of people who are faced with a life-changing medical diagnosis based on analyses of queries extracted from an anonymized search query log. Focusing on breast cancer, we manually tag a set of Web searchers as showing patterns of search behavior consistent with someone grappling with the screening, diagnosis, and treatment of breast cancer. We build and apply probabilistic classifiers to detect these searchers from multiple sessions and to identify the timing of diagnosis using temporal and statistical features. We explore the changes in information seeking over time before and after an inferred diagnosis of breast cancer by aligning multiple searchers by the estimated time of diagnosis. We employ the classifier to automatically identify 1,700 candidate searchers with an estimated 90\% precision, and we predict the day of diagnosis within 15 days with an 88\% accuracy. We show that the geographic and demographic attributes of searchers identified with high probability are strongly correlated with ground truth of reported incidence rates. We then analyze the content of queries over time for inferred cancer patients, using a detailed ontology of cancer-related search terms. The analysis reveals the rich temporal structure of the evolving queries of people likely diagnosed with breast cancer. Finally, we focus on subtypes of illness based on inferred stages of cancer and show clinically relevant dynamics of information seeking based on the dominant stage expressed by searchers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Doerfel:2016:WUA, author = "Stephan Doerfel and Daniel Zoller and Philipp Singer and Thomas Niebler and Andreas Hotho and Markus Strohmaier", title = "What Users Actually Do in a Social Tagging System: a Study of User Behavior in {BibSonomy}", journal = j-TWEB, volume = "10", number = "2", pages = "14:1--14:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2896821", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed May 25 17:02:04 MDT 2016", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Social tagging systems have established themselves as an important part in today's Web and have attracted the interest of our research community in a variety of investigations. Henceforth, several aspects of social tagging systems have been discussed and assumptions have emerged on which our community builds their work. Yet, testing such assumptions has been difficult due to the absence of suitable usage data in the past. In this work, we thoroughly investigate and evaluate four aspects about tagging systems, covering social interaction, retrieval of posted resources, the importance of the three different types of entities, users, resources, and tags, as well as connections between these entities' popularity in posted and in requested content. For that purpose, we examine live server log data gathered from the real-world, public social tagging system BibSonomy. Our empirical results paint a mixed picture about the four aspects. Although typical assumptions hold to a certain extent for some, other aspects need to be reflected in a very critical light. Our observations have implications for the understanding of social tagging systems and the way they are used on the Web. We make the dataset used in this work available to other researchers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Naini:2016:SEW, author = "Kaweh Djafari Naini and Ismail Sengor Altingovde and Wolf Siberski", title = "Scalable and Efficient {Web} Search Result Diversification", journal = j-TWEB, volume = "10", number = "3", pages = "15:1--15:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2907948", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:09 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "It has been shown that top-$k$ retrieval quality can be considerably improved by taking not only relevance but also diversity into account. However, currently proposed diversification approaches have not put much attention on practical usability in large-scale settings, such as modern web search systems. In this work, we make two contributions toward this goal. First, we propose a combination of optimizations and heuristics for an implicit diversification algorithm based on the desirable facility placement principle, and present two algorithms that achieve linear complexity without compromising the retrieval effectiveness. Instead of an exhaustive comparison of documents, these algorithms first perform a clustering phase and then exploit its outcome to compose the diverse result set. Second, we describe and analyze two variants for distributed diversification in a computing cluster, for large-scale IR where the document collection is too large to keep in one node. Our contribution in this direction is pioneering, as there exists no earlier work in the literature that investigates the effectiveness and efficiency of diversification on a distributed setup. Extensive evaluations on a standard TREC framework demonstrate a competitive retrieval quality of the proposed optimizations to the baseline algorithm while reducing the processing time by more than 80\% and up to 97\%, and shed light on the efficiency and effectiveness tradeoffs of diversification when applied on top of a distributed architecture.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Furche:2016:PFW, author = "Tim Furche and Giovanni Grasso and Michael Huemer and Christian Schallhart and Michael Schrefl", title = "{PeaCE-Ful} {Web} Event Extraction and Processing as Bitemporal Mutable Events", journal = j-TWEB, volume = "10", number = "3", pages = "16:1--16:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2911989", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:09 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The web is the largest bulletin board of the world. Events of all types, from flight arrivals to business meetings, are announced on this board. Tracking and reacting to such event announcements, however, is a tedious manual task, only slightly alleviated by email or similar notifications. Announcements are published with human readers in mind, and updates or delayed announcements are frequent. These characteristics have hampered attempts at automatic tracking. P eaCE provides the first integrated framework for event processing on top of web event ads, consisting of event extraction, complex event processing, and action execution in response to these events. Given a schema of the events to be tracked, the framework populates this schema by extracting events from announcement sources. This extraction is performed by little programs called wrappers that produce the events including updates and retractions. PeaCE then queries these events to detect complex events, often combining announcements from multiple sources. To deal with updates and delayed announcements, PeaCE's schemas are bitemporal, to distinguish between occurrence and detection time. This allows complex event specifications to track updates and to react upon differences in occurrence and detection time. In case of new, changing, or deleted events, PeaCE allows one to execute actions, such as tweeting or sending out email notifications. Actions are typically specified as web interactions, for example, to fill and submit a form with attributes of the triggering event. Our evaluation shows that P eaCE's processing is dominated by the time needed for accessing the web to extract events and perform actions, allotting to 97.4\%. Thus, PeaCE requires only 2.6\% overhead, and therefore, the complex event processor scales well even with moderate resources. We further show that simple and reasonable restrictions on complex event specifications and the timing of constituent events suffice to guarantee that PeaCE only requires a constant buffer to process arbitrarily many event announcements.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cranor:2016:LSE, author = "Lorrie Faith Cranor and Pedro Giovanni Leon and Blase Ur", title = "A Large-Scale Evaluation of {U.S.} Financial Institutions' Standardized Privacy Notices", journal = j-TWEB, volume = "10", number = "3", pages = "17:1--17:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2911988", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:09 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Financial institutions in the United States are required by the Gramm-Leach-Bliley Act to provide annual privacy notices. In 2009, eight federal agencies jointly released a model privacy form for these disclosures. While the use of this model privacy form is not required, it has been widely adopted. We automatically evaluated 6,191 U.S. financial institutions' privacy notices posted on the World Wide Web. We found large variance in stated practices, even among institutions of the same type. While thousands of financial institutions share personal information without providing the opportunity for consumers to opt out, some institutions' practices are more privacy protective. Regression analyses show that large institutions and those headquartered in the northeastern region share consumers' personal information at higher rates than all other institutions. Furthermore, our analysis helped us uncover institutions that do not let consumers limit data sharing when legally required to do so, as well as institutions making self-contradictory statements. We discuss implications for privacy in the financial industry, issues with the design and use of the model privacy form on the World Wide Web, and future directions for standardized privacy notice.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Srba:2016:CSC, author = "Ivan Srba and Maria Bielikova", title = "A Comprehensive Survey and Classification of Approaches for Community Question Answering", journal = j-TWEB, volume = "10", number = "3", pages = "18:1--18:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2934687", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:09 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Community question-answering (CQA) systems, such as Yahoo! Answers or Stack Overflow, belong to a prominent group of successful and popular Web 2.0 applications, which are used every day by millions of users to find an answer on complex, subjective, or context-dependent questions. In order to obtain answers effectively, CQA systems should optimally harness collective intelligence of the whole online community, which will be impossible without appropriate collaboration support provided by information technologies. Therefore, CQA became an interesting and promising subject of research in computer science and now we can gather the results of 10 years of research. Nevertheless, in spite of the increasing number of publications emerging each year, so far the research on CQA systems has missed a comprehensive state-of-the-art survey. We attempt to fill this gap by a review of 265 articles published between 2005 and 2014, which were selected from major conferences and journals. According to this evaluation, at first we propose a framework that defines descriptive attributes of CQA approaches. Second, we introduce a classification of all approaches with respect to problems they are aimed to solve. The classification is consequently employed in a review of a significant number of representative approaches, which are described by means of attributes from the descriptive framework. As a part of the survey, we also depict the current trends as well as highlight the areas that require further attention from the research community.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Hwang:2016:PPS, author = "Seung-Won Hwang and Saehoon Kim and Yuxiong He and Sameh Elnikety and Seungjin Choi", title = "Prediction and Predictability for Search Query Acceleration", journal = j-TWEB, volume = "10", number = "3", pages = "19:1--19:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2943784", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:09 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A commercial web search engine shards its index among many servers, and therefore the response time of a search query is dominated by the slowest server that processes the query. Prior approaches target improving responsiveness by reducing the tail latency, or high-percentile response time, of an individual search server. They predict query execution time, and if a query is predicted to be long-running, it runs in parallel; otherwise, it runs sequentially. These approaches are, however, not accurate enough for reducing a high tail latency when responses are aggregated from many servers because this requires each server to reduce a substantially higher tail latency (e.g., the 99.99th percentile), which we call extreme tail latency. To address tighter requirements of extreme tail latency, we propose a new design space for the problem, subsuming existing work and also proposing a new solution space. Existing work makes a prediction using features available at indexing time and focuses on optimizing prediction features for accelerating tail queries. In contrast, we identify ``when to predict?'' as another key optimization question. This opens up a new solution of delaying a prediction by a short duration to allow many short-running queries to complete without parallelization and, at the same time, to allow the predictor to collect a set of dynamic features using runtime information. This new question expands a solution space in two meaningful ways. First, we see a significant reduction of tail latency by leveraging ``dynamic'' features collected at runtime that estimate query execution time with higher accuracy. Second, we can ask whether to override prediction when the ``predictability'' is low. We show that considering predictability accelerates the query by achieving a higher recall. With this prediction, we propose to accelerate the queries that are predicted to be long-running. In our preliminary work, we focused on parallelization as an acceleration scenario. We extend to consider heterogeneous multicore hardware for acceleration. This hardware combines processor cores with different microarchitectures such as energy-efficient little cores and high-performance big cores, and accelerating web search using this hardware has remained an open problem. We evaluate the proposed prediction framework in two scenarios: (1) query parallelization on a multicore processor and (2) query scheduling on a heterogeneous processor. Our extensive evaluation results show that, for both scenarios of query acceleration using parallelization and heterogeneous cores, the proposed framework is effective in reducing the extreme tail latency compared to a start-of-the-art predictor because of its higher recall, and it improves server throughput by more than 70\% because of its improved precision.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Eraslan:2016:STA, author = "Sukru Eraslan and Yeliz Yesilada and Simon Harper", title = "Scanpath Trend Analysis on {Web} Pages: Clustering Eye Tracking Scanpaths", journal = j-TWEB, volume = "10", number = "4", pages = "20:1--20:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2970818", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Eye tracking studies have widely been used in improving the design and usability of web pages and in the research of understanding how users navigate them. However, there is limited research in clustering users' eye movement sequences (i.e., scanpaths) on web pages to identify a general direction they follow. Existing research tends to be reductionist, which means that the resulting path is so short that it is not useful. Moreover, there is little work on correlating users' scanpaths with visual elements of web pages and the underlying source code, which means the result cannot be used for further processing. In order to address these limitations, we introduce a new concept in clustering scanpaths called Scanpath Trend Analysis (STA) that not only considers the visual elements visited by all users, but also considers the visual elements visited by the majority in any order. We present an algorithm which automatically does this trend analysis to identify a trending scanpath for multiple web users in terms of visual elements of a web page. In contrast to existing research, the STA algorithm first analyzes the most visited visual elements in given scanpaths, clusters the scanpaths by arranging these visual elements based on their overall positions in the individual scanpaths, and then constructs a trending scanpath in terms of these visual elements. This algorithm was experimentally evaluated by an eye tracking study on six web pages for two different kinds of tasks (12 cases in total). Our experimental results show that the STA algorithm generates a trending scanpath that addresses the reductionist problem of existing work by preventing the loss of commonly visited visual elements for all cases. Based on the statistical tests, the STA algorithm also generates a trending scanpath that is significantly more similar to the inputted scanpaths compared to other existing work in 10 out of 12 cases. In the remaining cases, the STA algorithm still performs significantly better than some other existing work. This algorithm contributes to behavior analysis research on the web that can be used for different purposes: for example, re-engineering web pages guided by the trending scanpath to improve users' experience or guiding designers to improve their design.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Rafalak:2016:WCC, author = "Maria Rafalak and Dominik Deja and Adam Wierzbicki and Radoslaw Nielek and Michal Kakol", title = "{Web} Content Classification Using Distributions of Subjective Quality Evaluations", journal = j-TWEB, volume = "10", number = "4", pages = "21:1--21:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2994132", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Machine learning algorithms and recommender systems trained on human ratings are widely in use today. However, human ratings may be associated with a high level of uncertainty and are subjective, influenced by demographic or psychological factors. We propose a new approach to the design of object classes from human ratings: the use of entire distributions to construct classes. By avoiding aggregation for class definition, our approach loses no information and can deal with highly volatile or conflicting ratings. The approach is based the concept of the Earth Mover's Distance (EMD), a measure of distance for distributions. We evaluate the proposed approach based on four datasets obtained from diverse Web content or movie quality evaluation services or experiments. We show that clusters discovered in these datasets using the EMD measure are characterized by a consistent and simple interpretation. Quality classes defined using entire rating distributions can be fitted to clusters of distributions in the four datasets using two parameters, resulting in a good overall fit. We also consider the impact of the composition of small samples on the distributions that are the basis of our classification approach. We show that using distributions based on small samples of 10 evaluations is still robust to several demographic and psychological variables. This observation suggests that the proposed approach can be used in practice for quality evaluation, even for highly uncertain and subjective ratings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Guo:2016:FEE, author = "Guangming Guo and Feida Zhu and Enhong Chen and Qi Liu and Le Wu and Chu Guan", title = "From Footprint to Evidence: an Exploratory Study of Mining Social Data for Credit Scoring", journal = j-TWEB, volume = "10", number = "4", pages = "22:1--22:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2996465", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "With the booming popularity of online social networks like Twitter and Weibo, online user footprints are accumulating rapidly on the social web. Simultaneously, the question of how to leverage the large-scale user-generated social media data for personal credit scoring comes into the sight of both researchers and practitioners. It has also become a topic of great importance and growing interest in the P2P lending industry. However, compared with traditional financial data, heterogeneous social data presents both opportunities and challenges for personal credit scoring. In this article, we seek a deep understanding of how to learn users' credit labels from social data in a comprehensive and efficient way. Particularly, we explore the social-data-based credit scoring problem under the micro-blogging setting for its open, simple, and real-time nature. To identify credit-related evidence hidden in social data, we choose to conduct an analytical and empirical study on a large-scale dataset from Weibo, the largest and most popular tweet-style website in China. Summarizing results from existing credit scoring literature, we first propose three social-data-based credit scoring principles as guidelines for in-depth exploration. In addition, we glean six credit-related insights arising from empirical observations of the testbed dataset. Based on the proposed principles and insights, we extract prediction features mainly from three categories of users' social data, including demographics, tweets, and networks. To harness this broad range of features, we put forward a two-tier stacking and boosting enhanced ensemble learning framework. Quantitative investigation of the extracted features shows that online social media data does have good potential in discriminating good credit users from bad. Furthermore, we perform experiments on the real-world Weibo dataset consisting of more than 7.3 million tweets and 200,000 users whose credit labels are known through our third-party partner. Experimental results show that (i) our approach achieves a roughly 0.625 AUC value with all the proposed social features as input, and (ii) our learning algorithm can outperform traditional credit scoring methods by as much as 17\% for social-data-based personal credit scoring.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bahri:2016:CCO, author = "Leila Bahri and Barbara Carminati and Elena Ferrari", title = "{COIP}-Continuous, Operable, Impartial, and Privacy-Aware Identity Validity Estimation for {OSN} Profiles", journal = j-TWEB, volume = "10", number = "4", pages = "23:1--23:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/3014338", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Identity validation of Online Social Networks' (OSNs') peers is a critical concern to the insurance of safe and secure online socializing environments. Starting from the vision of empowering users to determine the validity of OSN identities, we suggest a framework to estimate the trustworthiness of online social profiles based only on the information they contain. Our framework is based on learning identity correlations between profile attributes in an OSN community and on collecting ratings from OSN community members to evaluate the trustworthiness of target profiles. Our system guarantees utility, user anonymity, impartiality in rating, and operability within the dynamics and continuous evolution of OSNs. In this article, we detail the system design, and we prove its correctness against these claimed quality properties. Moreover, we test its effectiveness, feasibility, and efficiency through experimentation on real-world datasets from Facebook and Google+, in addition to using the Adults UCI dataset.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "23", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Das:2016:MAA, author = "Sanmay Das and Allen Lavoie and Malik Magdon-Ismail", title = "Manipulation among the Arbiters of Collective Intelligence: How {Wikipedia} Administrators Mold Public Opinion", journal = j-TWEB, volume = "10", number = "4", pages = "24:1--24:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/3001937", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Our reliance on networked, collectively built information is a vulnerability when the quality or reliability of this information is poor. Wikipedia, one such collectively built information source, is often our first stop for information on all kinds of topics; its quality has stood up to many tests, and it prides itself on having a ``neutral point of view.'' Enforcement of neutrality is in the hands of comparatively few, powerful administrators. In this article, we document that a surprisingly large number of editors change their behavior and begin focusing more on a particular controversial topic once they are promoted to administrator status. The conscious and unconscious biases of these few, but powerful, administrators may be shaping the information on many of the most sensitive topics on Wikipedia; some may even be explicitly infiltrating the ranks of administrators in order to promote their own points of view. In addition, we ask whether administrators who change their behavior in this suspicious manner can be identified in advance. Neither prior history nor vote counts during an administrator's election are useful in doing so, but we find that an alternative measure, which gives more weight to influential voters, can successfully reject these suspicious candidates. This second result has important implications for how we harness collective intelligence: even if wisdom exists in a collective opinion (like a vote), that signal can be lost unless we carefully distinguish the true expert voter from the noisy or manipulative voter.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "24", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Mukherjee:2017:ISV, author = "Partha Mukherjee and Bernard J. Jansen", title = "Information Sharing by Viewers Via Second Screens for In-Real-Life Events", journal = j-TWEB, volume = "11", number = "1", pages = "1:1--1:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3009970", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The use of second screen devices with social media facilitates conversational interaction concerning broadcast media events, creating what we refer to as the social soundtrack. In this research, we evaluate the change of the Super Bowl XLIX social soundtrack across three social media platforms on the topical categories of commercials, music, and game at three game phases ( Pre, During, and Post ). We perform statistical analysis on more than 3M, 800K, and 50K posts from Twitter, Instagram, and Tumblr, respectively. Findings show that the volume of posts in the During phase is fewer compared to Pre and Post phases; however, the hourly mean in the During phase is considerably higher than it is in the other two phases. We identify the predominant phase and category of interaction across all three social media sites. We also determine the significance of change in absolute scale across the Super Bowl categories (commercials, music, game) and in both absolute and relative scales across Super Bowl phases ( Pre, During, Post ) for the three social network platforms (Twitter, Tumblr, Instagram). Results show that significant phase-category relationships exist for all three social networks. The results identify the During phase as the predominant one for all three categories on all social media sites with respect to the absolute volume of conversations in a continuous scale. From the relative volume perspective, the During phase is highest for the music category for most social networks. For the commercials and game categories, however, the Post phase is higher than the During phase for Twitter and Instagram, respectively. Regarding category identification, the game category is the highest for Twitter and Instagram but not for Tumblr, which has dominant peaks for music and/or commercials in all three phases. It is apparent that different social media platforms offer various phase and category affordances. These results are important in identifying the influence that second screen technology has on information sharing across different social media platforms and indicates that the viewer role is transitioning from passive to more active.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Squicciarini:2017:TAO, author = "Anna Squicciarini and Cornelia Caragea and Rahul Balakavi", title = "Toward Automated Online Photo Privacy", journal = j-TWEB, volume = "11", number = "1", pages = "2:1--2:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2983644", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Online photo sharing is an increasingly popular activity for Internet users. More and more users are now constantly sharing their images in various social media, from social networking sites to online communities, blogs, and content sharing sites. In this article, we present an extensive study exploring privacy and sharing needs of users' uploaded images. We develop learning models to estimate adequate privacy settings for newly uploaded images, based on carefully selected image-specific features. Our study investigates both visual and textual features of images for privacy classification. We consider both basic image-specific features, commonly used for image processing, as well as more sophisticated and abstract visual features. Additionally, we include a visual representation of the sentiment evoked by images. To our knowledge, sentiment has never been used in the context of image classification for privacy purposes. We identify the smallest set of features, that by themselves or combined together with others, can perform well in properly predicting the degree of sensitivity of users' images. We consider both the case of binary privacy settings (i.e., public, private), as well as the case of more complex privacy options, characterized by multiple sharing options. Our results show that with few carefully selected features, one may achieve high accuracy, especially when high-quality tags are available.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Kang:2017:EMA, author = "Jeon-Hyung Kang and Kristina Lerman", title = "Effort Mediates Access to Information in Online Social Networks", journal = j-TWEB, volume = "11", number = "1", pages = "3:1--3:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2990506", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Individuals' access to information in a social network depends on how it is distributed and where in the network individuals position themselves. In addition, individuals vary in how much effort they invest in managing their social connections. Using data from a social media site, we study how the interplay between effort and network position affects social media users' access to diverse and novel information. Previous studies of the role of networks in information access were limited in their ability to measure the diversity of information. We address this problem by learning the topics of interest to social media users from the messages they share online with followers. We use the learned topics to measure the diversity of information users receive from the people they follow online. We confirm that users in structurally diverse network positions, which bridge otherwise disconnected regions of the follower network, tend to be exposed to more diverse and novel information. We also show that users who invest more effort in their activity on the site are not only located in more structurally diverse positions within the network than the less engaged users but also receive more novel and diverse information when in similar network positions. These findings indicate that the relationship between network structure and access to information in networks is more nuanced than previously thought.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Seneviratne:2017:SMA, author = "Suranga Seneviratne and Aruna Seneviratne and Mohamed Ali Kaafar and Anirban Mahanti and Prasant Mohapatra", title = "Spam Mobile Apps: Characteristics, Detection, and in the Wild Analysis", journal = j-TWEB, volume = "11", number = "1", pages = "4:1--4:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3007901", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The increased popularity of smartphones has attracted a large number of developers to offer various applications for the different smartphone platforms via the respective app markets. One consequence of this popularity is that the app markets are also becoming populated with spam apps. These spam apps reduce the users' quality of experience and increase the workload of app market operators to identify these apps and remove them. Spam apps can come in many forms such as apps not having a specific functionality, those having unrelated app descriptions or unrelated keywords, or similar apps being made available several times and across diverse categories. Market operators maintain antispam policies and apps are removed through continuous monitoring. Through a systematic crawl of a popular app market and by identifying apps that were removed over a period of time, we propose a method to detect spam apps solely using app metadata available at the time of publication. We first propose a methodology to manually label a sample of removed apps, according to a set of checkpoint heuristics that reveal the reasons behind removal. This analysis suggests that approximately 35\% of the apps being removed are very likely to be spam apps. We then map the identified heuristics to several quantifiable features and show how distinguishing these features are for spam apps. We build an Adaptive Boost classifier for early identification of spam apps using only the metadata of the apps. Our classifier achieves an accuracy of over 95\% with precision varying between 85\% and 95\% and recall varying between 38\% and 98\%. We further show that a limited number of features, in the range of 10--30, generated from app metadata is sufficient to achieve a satisfactory level of performance. On a set of 180,627 apps that were present at the app market during our crawl, our classifier predicts 2.7\% of the apps as potential spam. Finally, we perform additional manual verification and show that human reviewers agree with 82\% of our classifier predictions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Uribe:2017:UWP, author = "Silvia Uribe and Federico {\'A}lvarez and Jos{\'e} Manuel Men{\'e}ndez", title = "User's {Web} Page Aesthetics Opinion: a Matter of Low-Level Image Descriptors Based on {MPEG-7}", journal = j-TWEB, volume = "11", number = "1", pages = "5:1--5:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3019595", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Analyzing a user's first impression of a Web site is essential for interface designers, as it is tightly related to their overall opinion of a site. In fact, this early evaluation affects user navigation behavior. Perceived usability and user interest (e.g., revisiting and recommending the site) are parameters influenced by first opinions. Thus, predicting the latter when creating a Web site is vital to ensure users' acceptance. In this regard, Web aesthetics is one of the most influential factors in this early perception. We propose the use of low-level image parameters for modeling Web aesthetics in an objective manner, which is an innovative research field. Our model, obtained by applying a stepwise multiple regression algorithm, infers a user's first impression by analyzing three different visual characteristics of Web site screenshots-texture, luminance, and color-which are directly derived from MPEG-7 descriptors. The results obtained over three wide Web site datasets (composed by 415, 42, and 6 Web sites, respectively) reveal a high correlation between low-level parameters and the users' evaluation, thus allowing a more precise and objective prediction of users' opinion than previous models that are based on other image characteristics with fewer predictors. Therefore, our model is meant to support a rapid assessment of Web sites in early stages of the design process to maximize the likelihood of the users' final approval.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Parra-Arnau:2017:MBT, author = "Javier Parra-Arnau and Jagdish Prasad Achara and Claude Castelluccia", title = "{MyAdChoices}: Bringing Transparency and Control to Online Advertising", journal = j-TWEB, volume = "11", number = "1", pages = "7:1--7:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2996466", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 3 11:10:10 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The intrusiveness and the increasing invasiveness of online advertising have, in the last few years, raised serious concerns regarding user privacy and Web usability. As a reaction to these concerns, we have witnessed the emergence of a myriad of ad-blocking and antitracking tools, whose aim is to return control to users over advertising. The problem with these technologies, however, is that they are extremely limited and radical in their approach: users can only choose either to block or allow all ads. With around 200 million people regularly using these tools, the economic model of the Web-in which users get content free in return for allowing advertisers to show them ads-is at serious peril. In this article, we propose a smart Web technology that aims at bringing transparency to online advertising, so that users can make an informed and equitable decision regarding ad blocking. The proposed technology is implemented as a Web-browser extension and enables users to exert fine-grained control over advertising, thus providing them with certain guarantees in terms of privacy and browsing experience, while preserving the Internet economic model. Experimental results in a real environment demonstrate the suitability and feasibility of our approach, and provide preliminary findings on behavioral targeting from real user browsing profiles.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wang:2017:VMC, author = "Tianyi Wang and Gang Wang and Bolun Wang and Divya Sambasivan and Zengbin Zhang and Xing Li and Haitao Zheng and Ben Y. Zhao", title = "Value and Misinformation in Collaborative Investing Platforms", journal = j-TWEB, volume = "11", number = "2", pages = "8:1--8:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3027487", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "It is often difficult to separate the highly capable ``experts'' from the average worker in crowdsourced systems. This is especially true for challenge application domains that require extensive domain knowledge. The problem of stock analysis is one such domain, where even the highly paid, well-educated domain experts are prone to make mistakes. As an extremely challenging problem space, the ``wisdom of the crowds'' property that many crowdsourced applications rely on may not hold. In this article, we study the problem of evaluating and identifying experts in the context of SeekingAlpha and StockTwits, two crowdsourced investment services that have recently begun to encroach on a space dominated for decades by large investment banks. We seek to understand the quality and impact of content on collaborative investment platforms, by empirically analyzing complete datasets of SeekingAlpha articles (9 years) and StockTwits messages (4 years). We develop sentiment analysis tools and correlate contributed content to the historical performance of relevant stocks. While SeekingAlpha articles and StockTwits messages provide minimal correlation to stock performance in aggregate, a subset of experts contribute more valuable (predictive) content. We show that these authors can be easily identified by user interactions, and investments based on their analysis significantly outperform broader markets. This effectively shows that even in challenging application domains, there is a secondary or indirect wisdom of the crowds. Finally, we conduct a user survey that sheds light on users' views of SeekingAlpha content and stock manipulation. We also devote efforts to identify potential manipulation of stocks by detecting authors controlling multiple identities.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Drutsa:2017:PUE, author = "Alexey Drutsa and Gleb Gusev and Pavel Serdyukov", title = "Periodicity in User Engagement with a Search Engine and Its Application to Online Controlled Experiments", journal = j-TWEB, volume = "11", number = "2", pages = "9:1--9:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2856822", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Nowadays, billions of people use the Web in connection with their daily needs. A significant part of these needs are constituted by search tasks that are usually addressed by search engines. Thus, daily search needs result in regular user engagement with a search engine. User engagement with web services was studied in various aspects, but there appears to be little work devoted to its regularity and periodicity. In this article, we study periodicity of user engagement with a popular search engine through applying spectrum analysis to temporal sequences of different engagement metrics. First, we found periodicity patterns of user engagement and revealed classes of users whose periodicity patterns do not change over a long period of time. In addition, we give an exhaustive analysis of the stability and quality of identified clusters. Second, we used the spectrum series as key metrics to evaluate search quality. We found that the novel periodicity metrics outperform the state-of-the-art quality metrics both in terms of significance level ( p -value) and sensitivity to a large set of larges-scale A/B experiments conducted on real search engine users.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Rahman:2017:AAC, author = "M. Rezaur Rahman and Jinyoung Han and Yong Jae Lee and Chen-Nee Chuah", title = "Analyzing the Adoption and Cascading Process of {OSN}-Based Gifting Applications: an Empirical Study", journal = j-TWEB, volume = "11", number = "2", pages = "10:1--10:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3023871", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "To achieve growth in the user base of online social networks--(OSN) based applications, word-of-mouth diffusion mechanisms, such as user-to-user invitations, are widely used. This article characterizes the adoption and cascading process of OSN-based applications that grow via user invitations. We analyze a detailed large-scale dataset of a popular Facebook gifting application, iHeart, that contains more than 2 billion entries of user activities generated by 190 million users during a span of 64 weeks. We investigate (1) how users invite their friends to an OSN-based application, (2) how application adoption of an individual user can be predicted, (3) what factors drive the cascading process of application adoptions, and (4) what are the good predictors of the ultimate cascade sizes. We find that sending or receiving a large number of invitations does not necessarily help to recruit new users to iHeart. We also find that the average success ratio of inviters is the most important feature in predicting an adoption of an individual user, which indicates that the effectiveness of inviters has strong predictive power with respect to application adoption. Based on the lessons learned from our analyses, we build and evaluate learning-based models to predict whether a user will adopt iHeart. Our proposed model that utilizes additional activity information of individual users from other similar types of gifting applications can achieve high precision (83\%) in predicting adoptions in the target application (i.e., iHeart). We next identify a set of distinctive features that are good predictors of the growth of the application adoptions in terms of final population size. We finally propose a prediction model to infer whether a cascade of application adoption will continue to grow in the future based on observing the initial adoption process. Results show that our proposed model can achieve high precision (over 80\%) in predicting large cascades of application adoptions. We believe our work can give an important implication in resource allocation of OSN-based product stakeholders, for example, via targeted marketing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Matsubara:2017:NDI, author = "Yasuko Matsubara and Yasushi Sakurai and B. Aditya Prakash and Lei Li and Christos Faloutsos", title = "Nonlinear Dynamics of Information Diffusion in Social Networks", journal = j-TWEB, volume = "11", number = "2", pages = "11:1--11:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3057741", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/string-matching.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The recent explosion in the adoption of search engines and new media such as blogs and Twitter have facilitated the faster propagation of news and rumors. How quickly does a piece of news spread over these media? How does its popularity diminish over time? Does the rising and falling pattern follow a simple universal law? In this article, we propose SpikeM, a concise yet flexible analytical model of the rise and fall patterns of information diffusion. Our model has the following advantages. First, unification power: it explains earlier empirical observations and generalizes theoretical models including the SI and SIR models. We provide the threshold of the take-off versus die-out conditions for SpikeM and discuss the generality of our model by applying it to an arbitrary graph topology. Second, practicality: it matches the observed behavior of diverse sets of real data. Third, parsimony: it requires only a handful of parameters. Fourth, usefulness: it makes it possible to perform analytic tasks such as forecasting, spotting anomalies, and interpretation by reverse engineering the system parameters of interest (quality of news, number of interested bloggers, etc.). We also introduce an efficient and effective algorithm for the real-time monitoring of information diffusion, namely SpikeStream, which identifies multiple diffusion patterns in a large collection of online event streams. Extensive experiments on real datasets demonstrate that SpikeM accurately and succinctly describes all patterns of the rise and fall spikes in social networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Rojas-Galeano:2017:OOO, author = "Sergio Rojas-Galeano", title = "On Obstructing Obscenity Obfuscation", journal = j-TWEB, volume = "11", number = "2", pages = "12:1--12:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3032963", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Obscenity (the use of rude words or offensive expressions) has spread from informal verbal conversations to digital media, becoming increasingly common on user-generated comments found in Web forums, newspaper user boards, social networks, blogs, and media-sharing sites. The basic obscenity-blocking mechanism is based on verbatim comparisons against a blacklist of banned vocabulary; however, creative users circumvent these filters by obfuscating obscenity with symbol substitutions or bogus segmentations that still visually preserve the original semantics, such as writing shit as {\em \$h`!t\/} or {\em s.h.i.t\/} or even worse mixing them as {\em \$.h....`!.t\/}. The number of potential obfuscated variants is combinatorial, yielding the verbatim filter impractical. Here we describe a method intended to obstruct this anomaly inspired by sequence alignment algorithms used in genomics, coupled with a tailor-made edit penalty function. The method only requires to set up the vocabulary of plain obscenities; no further training is needed. Its complexity on screening a single obscenity is linear, both in runtime and memory, on the length of the user-generated text. We validated the method on three different experiments. The first one involves a new dataset that is also introduced in this article; it consists of a set of manually annotated real-life comments in Spanish, gathered from the news user boards of an online newspaper, containing this type of obfuscation. The second one is a publicly available dataset of comments in Portuguese from a sports Web site. In these experiments, at the obscenity level, we observed recall rates greater than 90\%, whereas precision rates varied between 75\% and 95\%, depending on their sequence length (shorter lengths yielded a higher number of false alarms). On the other hand, at the comment level, we report recall of 86\%, precision of 91\%, and specificity of 98\%. The last experiment revealed that the method is more effective in matching this type of obfuscation compared to the classical Levenshtein edit distance. We conclude discussing the prospects of the method to help enforcing moderation rules of obscenity expressions or as a preprocessing mechanism for sequence cleaning and/or feature extraction in more sophisticated text categorization techniques.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Xu:2017:EIE, author = "Haitao Xu and Daiping Liu and Haining Wang and Angelos Stavrou", title = "An Empirical Investigation of Ecommerce-Reputation-Escalation-as-a-Service", journal = j-TWEB, volume = "11", number = "2", pages = "13:1--13:??", month = may, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2983646", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:38 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In online markets, a store's reputation is closely tied to its profitability. Sellers' desire to quickly achieve a high reputation has fueled a profitable underground business that operates as a specialized crowdsourcing marketplace and accumulates wealth by allowing online sellers to harness human laborers to conduct fake transactions to improve their stores' reputations. We term such an underground market a seller-reputation-escalation (SRE) market. In this article, we investigate the impact of the SRE service on reputation escalation by performing in-depth measurements of the prevalence of the SRE service, the business model and market size of SRE markets, and the characteristics of sellers and offered laborers. To this end, we have infiltrated five SRE markets and studied their operations using daily data collection over a continuous period of 2 months. We identified more than 11,000 online sellers posting at least 219,165 fake-purchase tasks on the five SRE markets. These transactions earned at least \$46,438 in revenue for the five SRE markets, and the total value of merchandise involved exceeded \$3,452,530. Our study demonstrates that online sellers using the SRE service can increase their stores' reputations at least 10 times faster than legitimate ones while about 25\% of them were visibly penalized. Even worse, we found a much stealthier and more hazardous service that can, within a single day, boost a seller's reputation by such a degree that would require a legitimate seller at least a year to accomplish. Armed with our analysis of the operational characteristics of the underground economy, we offer some insights into potential mitigation strategies. Finally, we revisit the SRE ecosystem 1 year later to evaluate the latest dynamism of the SRE markets, especially the statuses of the online stores once identified to launch fake-transaction campaigns on the SRE markets. We observe that the SRE markets are not as active as they were 1 year ago and about 17\% of the involved online stores become inaccessible likely because they have been forcibly shut down by the corresponding E-commerce marketplace for conducting fake transactions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Singer:2017:BMC, author = "Philipp Singer and Denis Helic and Andreas Hotho and Markus Strohmaier", title = "A {Bayesian} Method for Comparing Hypotheses About Human Trails", journal = j-TWEB, volume = "11", number = "3", pages = "14:1--14:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3054950", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "When users interact with the Web today, they leave sequential digital trails on a massive scale. Examples of such human trails include Web navigation, sequences of online restaurant reviews, or online music play lists. Understanding the factors that drive the production of these trails can be useful, for example, for improving underlying network structures, predicting user clicks, or enhancing recommendations. In this work, we present a method called HypTrails for comparing a set of hypotheses about human trails on the Web, where hypotheses represent beliefs about transitions between states. Our method utilizes Markov chain models with Bayesian inference. The main idea is to incorporate hypotheses as informative Dirichlet priors and to calculate the evidence of the data under them. For eliciting Dirichlet priors from hypotheses, we present an adaption of the so-called (trial) roulette method, and to compare the relative plausibility of hypotheses, we employ Bayes factors. We demonstrate the general mechanics and applicability of HypTrails by performing experiments with (i) synthetic trails for which we control the mechanisms that have produced them and (ii) empirical trails stemming from different domains including Web site navigation, business reviews, and online music played. Our work expands the repertoire of methods available for studying human trails.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Vahedian:2017:MRH, author = "Fatemeh Vahedian and Robin Burke and Bamshad Mobasher", title = "Multirelational Recommendation in Heterogeneous Networks", journal = j-TWEB, volume = "11", number = "3", pages = "15:1--15:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3054952", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Recommender systems are key components in information-seeking contexts where personalization is sought. However, the dominant framework for recommendation is essentially two dimensional, with the interaction between users and items characterized by a single relation. In many cases, such as social networks, users and items are joined in a complex web of relations, not readily reduced to a single value. Recent multirelational approaches to recommendation focus on the direct, proximal relations in which users and items may participate. Our approach uses the framework of complex heterogeneous networks to represent such recommendation problems. We propose the weighted hybrid of low-dimensional recommenders (WHyLDR) recommendation model, which uses extended relations, represented as constrained network paths, to effectively augment direct relations. This model incorporates influences from both distant and proximal connections in the network. The WHyLDR approach raises the problem of the unconstrained proliferation of components, built from ever-extended network paths. We show that although component utility is not strictly monotonic with path length, a measure based on information gain can effectively prune and optimize such hybrids.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Sariyuce:2017:NDI, author = "Ahmet Erdem Sariy{\"u}ce and C. Seshadhri and Ali Pinar and {\"U}mit V. {\c{C}}ataly{\"u}rek", title = "Nucleus Decompositions for Identifying Hierarchy of Dense Subgraphs", journal = j-TWEB, volume = "11", number = "3", pages = "16:1--16:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3057742", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Finding dense substructures in a graph is a fundamental graph mining operation, with applications in bioinformatics, social networks, and visualization to name a few. Yet most standard formulations of this problem (like clique, quasi-clique, densest at-least- k subgraph) are NP-hard. Furthermore, the goal is rarely to find the ``true optimum'' but to identify many (if not all) dense substructures, understand their distribution in the graph, and ideally determine relationships among them. Current dense subgraph finding algorithms usually optimize some objective and only find a few such subgraphs without providing any structural relations. We define the nucleus decomposition of a graph, which represents the graph as a forest of nuclei. Each nucleus is a subgraph where smaller cliques are present in many larger cliques. The forest of nuclei is a hierarchy by containment, where the edge density increases as we proceed towards leaf nuclei. Sibling nuclei can have limited intersections, which enables discovering overlapping dense subgraphs. With the right parameters, the nucleus decomposition generalizes the classic notions of $k$-core and $k$-truss decompositions. We present practical algorithms for nucleus decompositions and empirically evaluate their behavior in a variety of real graphs. The tree of nuclei consistently gives a global, hierarchical snapshot of dense substructures and outputs dense subgraphs of comparable quality with the state-of-the-art solutions that are dense and have non-trivial sizes. Our algorithms can process real-world graphs with tens of millions of edges in less than an hour. We demonstrate how proposed algorithms can be utilized on a citation network. Our analysis showed that dense units identified by our algorithms correspond to coherent articles on a specific area. Our experiments also show that we can identify dense structures that are lost within larger structures by other methods and find further finer grain structure within dense groups.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Kanza:2017:LBD, author = "Yaron Kanza and Elad Kravi and Eliyahu Safra and Yehoshua Sagiv", title = "Location-Based Distance Measures for Geosocial Similarity", journal = j-TWEB, volume = "11", number = "3", pages = "17:1--17:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3054951", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article investigates the problem of geosocial similarity among users of online social networks, based on the locations of their activities (e.g., posting messages or photographs). Finding pairs of geosocially similar users or detecting that two sets of locations (of activities) belong to the same user has important applications in privacy protection, recommendation systems, urban planning, and public health, among others. It is explained and shown empirically that common distance measures between sets of locations are inadequate for determining geosocial similarity. Two novel distance measures between sets of locations are introduced. One is the mutually nearest distance that is based on computing a matching between two sets. The second measure uses a quad-tree index. It is highly scalable but incurs the overhead of creating and maintaining the index. Algorithms with optimization techniques are developed for computing the two distance measures and also for finding the $k$-most-similar users of a given one. Extensive experiments, using geotagged messages from Twitter, show that the new distance measures are both more accurate and more efficient than existing ones.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Panagopoulos:2017:MER, author = "A. Panagopoulos and E. Koutrouli and A. Tsalgatidou", title = "Modeling and Evaluating a Robust Feedback-Based Reputation System for E-Commerce Platforms", journal = j-TWEB, volume = "11", number = "3", pages = "18:1--18:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3057265", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Despite the steady growth of e-commerce communities in the past two decades, little has changed in the way these communities manage reputation for building trust and for protecting their member's financial interests against fraud. As these communities mature and the defects of their reputation systems are revealed, further potential for deception against their members is created, that pushes the need for novel reputation mechanisms. Although a high volume of research works has explored the concepts of reputation and trust in e-communities, most of the proposed reputation systems target decentralized e-communities, focusing on issues related with the decentralized reputation management; they have not thus been integrated in e-commerce platforms. This work's objective is to provide an attackresilient feedback-based reputation system for modern e-commerce platforms, while minimizing the incurred financial burden of potent security schemes. Initially, we discuss a series of attacks and issues in reputation systems and study the different approaches of these problems from related works, while also considering the structural properties, defense mechanisms and policies of existing platforms. Then we present our proposition for a robust reputation system which consists of a novel reputation metric and attack prevention mechanisms. Finally, we describe the simulation framework and tool that we have implemented for thoroughly testing and evaluating the metric's resilience against attacks and present the evaluation experiments and their results. We consider the presented simulation framework as the second contribution of our article, aiming at facilitating the simulation and elaborate evaluation of reputation systems which specifically target e-commerce platforms by thoroughly presenting it, exhibiting its usage and making it available to the research community.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bianchini:2017:WMD, author = "Devis Bianchini and Valeria {De Antonellis} and Michele Melchiori", title = "{WISeR}: a Multi-Dimensional Framework for Searching and Ranking {Web APIs}", journal = j-TWEB, volume = "11", number = "3", pages = "19:1--19:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3061710", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 13 14:33:39 MDT 2017", bibsource = "http://portal.acm.org/; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Mashups are agile applications that aggregate RESTful services, developed by third parties, whose functions are exposed as Web Application Program Interfaces (APIs) within public repositories. From mashups developers' viewpoint, Web API search may benefit from selection criteria that combine several dimensions used to describe the APIs, such as categories, tags, and technical features (e.g., protocols and data formats). Nevertheless, other dimensions might be fruitfully exploited to support Web API search. Among them, past API usage experiences by other developers may be used to suggest the right APIs for a target application. Past experiences might emerge from the co-occurrence of Web APIs in the same mashups. Ratings assigned by developers after using the Web APIs to create their own mashups or after using mashups developed by others can be considered as well. This article aims to advance the current state of the art for Web API search and ranking from mashups developers' point of view, by addressing two key issues: multi-dimensional modeling and multi-dimensional framework for selection. The model for Web API characterization embraces multiple descriptive dimensions, by considering several public repositories, that focus on different and only partially overlapping dimensions. The proposed Web API selection framework, called WISeR (Web apI Search and Ranking), is based on functions devoted to developers to exploit the multi-dimensional descriptions, in order to enhance the identification of candidate Web APIs to be proposed, according to the given requirements. Furthermore, WISeR adapts to changes that occur during the Web API selection and mashup development, by revising the dimensional attributes in order to conform to developers' preferences and constraints. We also present an experimental evaluation of the framework.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Rocha:2017:LPL, author = "Andr{\'e} Rocha and C{\'a}ssio Prazeres", title = "{LDoW--PaN}: Linked Data on the {Web}-Presentation and Navigation", journal = j-TWEB, volume = "11", number = "4", pages = "20:1--20:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2983643", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This work aimed to propose LDoW-PaN, a Linked Data presentation and navigation model focused on the average user. The LDoW-PaN model is an extension of the Dexter Hypertext Reference Model. Through the LDoW-PaN model, ordinary people-who have no experience with technologies that involve the Linked Data environment-can interact with the Web of Data (RDF) more closely related to how they interact with the Web of Documents (HTML). To evaluate the proposal, some tools were developed, including the following: (i) a Web Service, which implements the lower-level layers of the LDoW-PaN model; (ii) a client-side script library, which implements the presentation and navigation layer; and (iii) a browser extension, which uses these tools to provide Linked Data presentation and navigation to users browsing the Web. The browser extension was developed using user interface approaches that are well known, well accepted, and evaluated by the Web research community, such as faceted navigation and presentation through tooltips. Therefore, the prototype evaluation included: usability evaluation through two classical techniques; computational complexity measures; and an analysis of the performance of the operations provided by the proposed model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wang:2017:CUB, author = "Gang Wang and Xinyi Zhang and Shiliang Tang and Christo Wilson and Haitao Zheng and Ben Y. Zhao", title = "Clickstream User Behavior Models", journal = j-TWEB, volume = "11", number = "4", pages = "21:1--21:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3068332", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The next generation of Internet services is driven by users and user-generated content. The complex nature of user behavior makes it highly challenging to manage and secure online services. On one hand, service providers cannot effectively prevent attackers from creating large numbers of fake identities to disseminate unwanted content (e.g., spam). On the other hand, abusive behavior from real users also poses significant threats (e.g., cyberbullying). In this article, we propose clickstream models to characterize user behavior in large online services. By analyzing clickstream traces (i.e., sequences of click events from users), we seek to achieve two goals: (1) detection: to capture distinct user groups for the detection of malicious accounts, and (2) understanding: to extract semantic information from user groups to understand the captured behavior. To achieve these goals, we build two related systems. The first one is a semisupervised system to detect malicious user accounts (Sybils). The core idea is to build a clickstream similarity graph where each node is a user and an edge captures the similarity of two users' clickstreams. Based on this graph, we propose a coloring scheme to identify groups of malicious accounts without relying on a large labeled dataset. We validate the system using ground-truth clickstream traces of 16,000 real and Sybil users from Renren, a large Chinese social network. The second system is an unsupervised system that aims to capture and understand the fine-grained user behavior. Instead of binary classification (malicious or benign), this model identifies the natural groups of user behavior and automatically extracts features to interpret their semantic meanings. Applying this system to Renren and another online social network, Whisper (100K users), we help service providers identify unexpected user behaviors and even predict users' future actions. Both systems received positive feedback from our industrial collaborators including Renren, LinkedIn, and Whisper after testing on their internal clickstream data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Hogan:2017:CFI, author = "Aidan Hogan", title = "Canonical Forms for Isomorphic and Equivalent {RDF} Graphs: Algorithms for Leaning and Labelling Blank Nodes", journal = j-TWEB, volume = "11", number = "4", pages = "22:1--22:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3068333", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Existential blank nodes greatly complicate a number of fundamental operations on Resource Description Framework (RDF) graphs. In particular, the problems of determining if two RDF graphs have the same structure modulo blank node labels (i.e., if they are isomorphic ), or determining if two RDF graphs have the same meaning under simple semantics (i.e., if they are simple-equivalent ), have no known polynomial-time algorithms. In this article, we propose methods that can produce two canonical forms of an RDF graph. The first canonical form preserves isomorphism such that any two isomorphic RDF graphs will produce the same canonical form; this iso-canonical form is produced by modifying the well-known canonical labelling algorithm N auty for application to RDF graphs. The second canonical form additionally preserves simple-equivalence such that any two simple-equivalent RDF graphs will produce the same canonical form; this equi-canonical form is produced by, in a preliminary step, leaning the RDF graph, and then computing the iso-canonical form. These algorithms have a number of practical applications, such as for identifying isomorphic or equivalent RDF graphs in a large collection without requiring pairwise comparison, for computing checksums or signing RDF graphs, for applying consistent Skolemisation schemes where blank nodes are mapped in a canonical manner to Internationalised Resource Identifiers (IRIs), and so forth. Likewise a variety of algorithms can be simplified by presupposing RDF graphs in one of these canonical forms. Both algorithms require exponential steps in the worst case; in our evaluation we demonstrate that there indeed exist difficult synthetic cases, but we also provide results over 9.9 million RDF graphs that suggest such cases occur infrequently in the real world, and that both canonical forms can be efficiently computed in all but a handful of such cases.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Koutrika:2017:SWP, author = "Georgia Koutrika and Qian Lin", title = "A Study of {Web} Print: What People Print in the Digital Era", journal = j-TWEB, volume = "11", number = "4", pages = "23:1--23:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3068331", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article analyzes a proprietary log of printed web pages and aims at answering questions regarding the content people print (what), the reasons they print (why), as well as attributes of their print profile (who). We present a classification of pages printed based on their print intent and we describe our methodology for processing the print dataset used in this study. In our analysis, we study the web sites, topics, and print intent of the pages printed along the following aspects: popularity, trends, activity, user diversity, and consistency. We present several findings that reveal interesting insights into printing. We analyze our findings and discuss their impact and directions for future work.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "23", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Bernaschi:2017:EAT, author = "Massimo Bernaschi and Alessandro Celestini and Stefano Guarino and Flavio Lombardi", title = "Exploring and Analyzing the {Tor} Hidden Services Graph", journal = j-TWEB, volume = "11", number = "4", pages = "24:1--24:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3008662", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The exploration and analysis of Web graphs has flourished in the recent past, producing a large number of relevant and interesting research results. However, the unique characteristics of the Tor network limit the applicability of standard techniques and demand for specific algorithms to explore and analyze it. The attention of the research community has focused on assessing the security of the Tor infrastructure (i.e., its ability to actually provide the intended level of anonymity) and on discussing what Tor is currently being used for. Since there are no foolproof techniques for automatically discovering Tor hidden services, little or no information is available about the topology of the Tor Web graph. Even less is known on the relationship between content similarity and topological structure. The present article aims at addressing such lack of information. Among its contributions: a study on automatic Tor Web exploration/data collection approaches; the adoption of novel representative metrics for evaluating Tor data; a novel in-depth analysis of the hidden services graph; a rich correlation analysis of hidden services' semantics and topology. Finally, a broad interesting set of novel insights/considerations over the Tor Web organization and content are provided.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "24", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Xu:2017:COF, author = "Chang Xu and Jie Zhang", title = "Collusive Opinion Fraud Detection in Online Reviews: a Probabilistic Modeling Approach", journal = j-TWEB, volume = "11", number = "4", pages = "25:1--25:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3098859", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We address the collusive opinion fraud problem in online review portals, where groups of people work together to deliver deceptive reviews for manipulating the reputations of targeted items. Such collusive fraud is considered much harder to defend against, since the participants (or colluders) can evade detection by shaping their behaviors collectively so as not to appear suspicious. To alleviate this problem, countermeasures have been proposed that leverage the collective behaviors of colluders. The motivation stems from the observation that colluders typically act in a very synchronized way, as they are instructed by the same campaigns with common items to target and schedules to follow. However, the collective behaviors examined in existing solutions focus mostly on the external appearance of fraud campaigns, such as the campaign size and the size of the targeted item set. These signals may become ineffective once colluders have changed their behaviors collectively. Moreover, the detection algorithms used in existing approaches are designed to only make collusion inference on the input data; predictive models that can be deployed for detecting emerging fraud cannot be learned from the data. In this article, to complement existing studies on collusive opinion fraud characterization and detection, we explore more subtle behavioral trails in collusive fraud practice. In particular, a suite of homogeneity-based measures are proposed to capture the interrelationships among colluders within campaigns. Moreover, a novel statistical model is proposed to further characterize, recognize, and predict collusive fraud in online reviews. The proposed model is fully unsupervised and highly flexible to incorporate effective measures available for better modeling and prediction. Through experiments on two real-world datasets, we show that our method outperforms the state of the art in both characterization and detection abilities.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "25", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Chattopadhyay:2017:FSM, author = "Soumi Chattopadhyay and Ansuman Banerjee and Nilanjan Banerjee", title = "A Fast and Scalable Mechanism for {Web} Service Composition", journal = j-TWEB, volume = "11", number = "4", pages = "26:1--26:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3098884", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Jan 15 08:22:45 MST 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In recent times, automated business processes and web services have become ubiquitous in diverse application spaces. Efficient composition of web services in real time while providing necessary Quality of Service (QoS) guarantees is a computationally complex problem and several heuristic based approaches have been proposed to compose the services optimally. In this article, we present the design of a scalable QoS-aware service composition mechanism that balances the computational complexity of service composition with the QoS guarantees of the composed service and achieves scalability. Our design guarantees a single QoS parameter using an intelligent search and pruning mechanism in the composed service space. We also show that our methodology yields near optimal solutions on real benchmarks. We then enhance our proposed mechanism to guarantee multiple QoS parameters using aggregation techniques. Finally, we explore search time versus solution quality tradeoff using parameterized search algorithms that produce better-quality solutions at the cost of delay. We present experimental results to show the efficiency of our proposed mechanism.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "26", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{He:2018:EET, author = "Ming He and Yong Ge and Enhong Chen and Qi Liu and Xuesong Wang", title = "Exploring the Emerging Type of Comment for Online Videos: {DanMu}", journal = j-TWEB, volume = "12", number = "1", pages = "1:1--1:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3098885", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:00 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "DanMu, an emerging type of user-generated comment, has become increasingly popular in recent years. Many online video platforms such as Tudou.com have provided the DanMu function. Unlike traditional online reviews such as reviews at Youtube.com that are outside the videos, DanMu is a scrolling marquee comment, which is overlaid directly on top of the video and synchronized to a specific playback time. Such comments are displayed as streams of moving subtitles overlaid on the video screen. Viewers could easily write DanMu s while watching videos, and the written DanMu s will be immediately overlaid onto the video and displayed to writers themselves and other viewers as well. Such DanMu systems have greatly enabled users to communicate with each other in a much more direct way, creating a real-time sharing experience. Although there are several unique features of DanMu and has had a great impact on online video systems, to the best of our knowledge, there is no work that has provided a comprehensive study on DanMu. In this article, as a pilot study, we analyze the unique characteristics of DanMu from various perspectives. Specifically, we first illustrate some unique distributions of DanMu s by comparing with traditional reviews (TReviews) that we collected from a real DanMu -enabled online video system. Second, we discover two interesting patterns in DanMu data: a herding effect and multiple-burst phenomena that are significantly different from those in TRviews and reveal important insights about the growth of DanMu s on a video. Towards exploring antecedents of both th herding effect and multiple-burst phenomena, we propose to further detect leading DanMu s within bursts, because those leading DanMu s make the most contribution to both patterns. A framework is proposed to detect leading DanMu s that effectively combines multiple factors contributing to leading DanMu s. Based on the identified characteristics of DanMu, finally we propose to predict the distribution of future DanMu s (i.e., the growth of DanMu s), which is important for many DanMu -enabled online video systems, for example, the predicted DanMu distribution could be an indicator of video popularity. This prediction task includes two aspects: One is to predict which videos future DanMu s will be posted for, and the other one is to predict which segments of a video future DanMu s will be posted on. We develop two sophisticated models to solve both problems. Finally, intensive experiments are conducted with a real-world dataset to validate all methods developed in this article.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Minervini:2018:AKP, author = "Pasquale Minervini and Volker Tresp and Claudia D'amato and Nicola Fanizzi", title = "Adaptive Knowledge Propagation in {Web} Ontologies", journal = j-TWEB, volume = "12", number = "1", pages = "2:1--2:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3105961", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:00 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We focus on the problem of predicting missing assertions in Web ontologies. We start from the assumption that individual resources that are similar in some aspects are more likely to be linked by specific relations: this phenomenon is also referred to as homophily and emerges in a variety of relational domains. In this article, we propose a method for (1) identifying which relations in the ontology are more likely to link similar individuals and (2) efficiently propagating knowledge across chains of similar individuals. By enforcing sparsity in the model parameters, the proposed method is able to select only the most relevant relations for a given prediction task. Our experimental evaluation demonstrates the effectiveness of the proposed method in comparison to state-of-the-art methods from the literature.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liu:2018:RCW, author = "Yining Liu and Yong Liu and Yanming Shen and Keqiu Li", title = "Recommendation in a Changing World: Exploiting Temporal Dynamics in Ratings and Reviews", journal = j-TWEB, volume = "12", number = "1", pages = "3:1--3:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3108238", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:00 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Users' preferences, and consequently their ratings and reviews to items, change over time. Likewise, characteristics of items are also time-varying. By dividing data into time periods, temporal Recommender Systems (RSs) improve recommendation accuracy by exploring the temporal dynamics in user rating data. However, temporal RSs have to cope with rating sparsity in each time period. Meanwhile, reviews generated by users contain rich information about their preferences, which can be exploited to address rating sparsity and further improve the performance of temporal RSs. In this article, we develop a temporal rating model with topics that jointly mines the temporal dynamics of both user-item ratings and reviews. Studying temporal drifts in reviews helps us understand item rating evolutions and user interest changes over time. Our model also automatically splits the review text in each time period into interim words and intrinsic words. By linking interim words and intrinsic words to short-term and long-term item features, respectively, we jointly mine the temporal changes in user and item latent features together with the associated review text in a single learning stage. Through experiments on 28 real-world datasets collected from Amazon, we show that the rating prediction accuracy of our model significantly outperforms the existing state-of-art RS models. And our model can automatically identify representative interim words in each time period as well as intrinsic words across all time periods. This can be very useful in understanding the time evolution of users' preferences and items' characteristics.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Tu:2018:ARP, author = "Wenting Tu and David W. Cheung and Nikos Mamoulis and Min Yang and Ziyu Lu", title = "Activity Recommendation with Partners", journal = j-TWEB, volume = "12", number = "1", pages = "4:1--4:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3121407", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:00 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Recommending social activities, such as watching movies or having dinner, is a common function found in social networks or e-commerce sites. Besides certain websites which manage activity-related locations (e.g., foursquare.com), many items on product sale platforms (e.g., groupon.com) can naturally be mapped to social activities. For example, movie tickets can be thought of as activity items, which can be mapped as a social activity of ``watch a movie.'' Traditional recommender systems estimate the degree of interest for a target user on candidate items (or activities), and accordingly, recommend the top-$k$ activity items to the user. However, these systems ignore an important social characteristic of recommended activities: people usually tend to participate in those activities with friends. This article considers this fact for improving the effectiveness of recommendation in two directions. First, we study the problem of activity-partner recommendation; i.e., for each recommended activity item, find a suitable partner for the user. This (i) saves the user's time for finding activity partners, (ii) increases the likelihood that the activity item will be selected by the user, and (iii) improves the effectiveness of recommender systems to users overall and enkindles their social enthusiasm. Our partner recommender is built upon the users' historical attendance preferences, their social context, and geographic information. Moreover, we explore how to leverage the partner recommendation to help improve the effectiveness of recommending activities to users. Assuming that users tend to select the activities for which they can find suitable partners, we propose a partner-aware activity recommendation model, which integrates this hypothesis into conventional recommendation approaches. Finally, the recommended items not only match users' interests, but also have high chances to be selected by the users, because the users can find suitable partners to attend the corresponding activities together. We conduct experiments on real data to evaluate the effectiveness of activity-partner recommendation and partner-aware activity recommendation. The results verify that (i) suggesting partners greatly improves the likelihood that a recommended activity item is to be selected by the target user and (ii) considering the existence of suitable partners in the ranking of recommended items improves the accuracy of recommendation significantly.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Dutta:2018:CRM, author = "Kaushik Dutta and Debra Vandermeer", title = "Caching to Reduce Mobile App Energy Consumption", journal = j-TWEB, volume = "12", number = "1", pages = "5:1--5:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3125778", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:00 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Mobile applications consume device energy for their operations, and the fast rate of battery depletion on mobile devices poses a major usability hurdle. After the display, data communication is the second-biggest consumer of mobile device energy. At the same time, software applications that run on mobile devices represent a fast-growing product segment. Typically, these applications serve as front-end display mechanisms, which fetch data from remote servers and display the information to the user in an appropriate format-incurring significant data communication overheads in the process. In this work, we propose methods to reduce energy overheads in mobile devices due to data communication by leveraging data caching technology. A review of existing caching mechanisms revealed that they are primarily designed for optimizing response time performance and cannot be easily ported to mobile devices for energy savings. Further, architectural differences between traditional client-server and mobile communications infrastructures make the use of existing caching technologies unsuitable in mobile devices. In this article, we propose a set of two new caching approaches specifically designed with the constraints of mobile devices in mind: (a) a response caching approach and (b) an object caching approach. Our experiments show that, even for a small cache size of 250MB, object caching can reduce energy consumption on average by 45\% compared to the no-cache case, and response caching can reduce energy consumption by 20\% compared to the no-cache case. The benefits increase with larger cache sizes. These results demonstrate the efficacy of our proposed method and raise the possibility of significantly extending mobile device battery life.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Manta-Caro:2018:MSW, author = "Cristyan Manta-Caro and Juan M. Fern{\'a}ndez-Luna", title = "Modeling and Simulating the {Web of Things} from an Information Retrieval Perspective", journal = j-TWEB, volume = "12", number = "1", pages = "6:1--6:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3132732", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:00 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Internet and Web technologies have changed our lives in ways we are not yet fully aware of. In the near future, Internet will interconnect more than 50 billion things in the real world, nodes will sense billions of features and properties of interest, and things will be represented by web-based, bi-directional services with highly dynamic content and real-time data. This is the new era of the Internet and the Web of Things. Since the emergence of such paradigms implies the evolution and integration of the systems with which they interact, it is essential to develop abstract models for representing and simulating the Web of Things in order to establish new approaches. This article describes a Web of Things model based on a structured XML representation. We also present a simulator whose ultimate goal is to encapsulate the expected dynamics of the Web of Things for the future development of information retrieval (IR) systems. The simulator generates a real-time collection of XML documents containing spatio-temporal contexts and textual and sensed information of highly dynamic dimensions. The simulator is characterized by its flexibility and versatility for representing real-world scenarios and offers a unique perspective for information retrieval. In this article, we evaluate and test the simulator in terms of its performance variables for computing resource consumption and present our experimentation with the simulator on three real scenarios by considering the generation variables for the IR document collection.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Davison:2018:LTR, author = "Brian D. Davison", title = "List of 2016 {TWEB} Reviewers", journal = j-TWEB, volume = "12", number = "1", pages = "7:1--7:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3180440", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:00 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Davison:2018:E, author = "Brian D. Davison", title = "Editorial", journal = j-TWEB, volume = "12", number = "2", pages = "8:1--8:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3232925", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:01 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8e", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Jia:2018:KGE, author = "Yantao Jia and Yuanzhuo Wang and Xiaolong Jin and Hailun Lin and Xueqi Cheng", title = "Knowledge Graph Embedding: a Locally and Temporally Adaptive Translation-Based Approach", journal = j-TWEB, volume = "12", number = "2", pages = "8:1--8:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3132733", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:01 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A knowledge graph is a graph with entities of different types as nodes and various relations among them as edges. The construction of knowledge graphs in the past decades facilitates many applications, such as link prediction, web search analysis, question answering, and so on. Knowledge graph embedding aims to represent entities and relations in a large-scale knowledge graph as elements in a continuous vector space. Existing methods, for example, TransE, TransH, and TransR, learn the embedding representation by defining a global margin-based loss function over the data. However, the loss function is determined during experiments whose parameters are examined among a closed set of candidates. Moreover, embeddings over two knowledge graphs with different entities and relations share the same set of candidates, ignoring the locality of both graphs. This leads to the limited performance of embedding related applications. In this article, a locally adaptive translation method for knowledge graph embedding, called TransA, is proposed to find the loss function by adaptively determining its margin over different knowledge graphs. Then the convergence of TransA is verified from the aspect of its uniform stability. To make the embedding methods up-to-date when new vertices and edges are added into the knowledge graph, the incremental algorithm for TransA, called iTransA, is proposed by adaptively adjusting the optimal margin over time. Experiments on four benchmark data sets demonstrate the superiority of the proposed method, as compared to the state-of-the-art ones.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Park:2018:WSD, author = "Souneil Park and Aleksandar Matic and Kamini Garg and Nuria Oliver", title = "When Simpler Data Does Not Imply Less Information: a Study of User Profiling Scenarios With Constrained View of Mobile {HTTP(S)} Traffic", journal = j-TWEB, volume = "12", number = "2", pages = "9:1--9:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3143402", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:01 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The exponential growth in smartphone adoption is contributing to the availability of vast amounts of human behavioral data. This data enables the development of increasingly accurate data-driven user models that facilitate the delivery of personalized services that are often free in exchange for the use of its customers' data. Although such usage conventions have raised many privacy concerns, the increasing value of personal data is motivating diverse entities to aggressively collect and exploit the data. In this article, we unfold profiling scenarios around mobile HTTP(S) traffic, focusing on those that have limited but meaningful segments of the data. The capability of the scenarios to profile personal information is examined with real user data, collected in the wild from 61 mobile phone users for a minimum of 30 days. Our study attempts to model heterogeneous user traits and interests, including personality, boredom proneness, demographics, and shopping interests. Based on our modeling results, we discuss various implications to personalization, privacy, and personal data rights.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Calzavara:2018:SBA, author = "Stefano Calzavara and Alvise Rabitti and Michele Bugliesi", title = "Semantics-Based Analysis of Content Security Policy Deployment", journal = j-TWEB, volume = "12", number = "2", pages = "10:1--10:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3149408", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:01 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Content Security Policy (CSP) is a recent W3C standard introduced to prevent and mitigate the impact of content injection vulnerabilities on websites. In this article, we introduce a formal semantics for the latest stable version of the standard, CSP Level 2. We then perform a systematic, large-scale analysis of the effectiveness of the current CSP deployment, using the formal semantics to substantiate our methodology and to assess the impact of the detected issues. We focus on four key aspects that affect the effectiveness of CSP: browser support, website adoption, correct configuration, and constant maintenance. Our analysis shows that browser support for CSP is largely satisfactory, with the exception of a few notable issues. However, there are several shortcomings relative to the other three aspects. CSP appears to have a rather limited deployment as yet and, more crucially, existing policies exhibit a number of weaknesses and misconfiguration errors. Moreover, content security policies are not regularly updated to ban insecure practices and remove unintended security violations. We argue that many of these problems can be fixed by better exploiting the monitoring facilities of CSP, while other issues deserve additional research, being more rooted into the CSP design.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cacheda:2018:CPU, author = "Fidel Cacheda and Roi Blanco and Nicola Barbieri", title = "Characterizing and Predicting Users' Behavior on Local Search Queries", journal = j-TWEB, volume = "12", number = "2", pages = "11:1--11:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3157059", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:01 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The use of queries to find products and services that are located nearby is increasing rapidly due mainly to the ubiquity of internet access and location services provided by smartphone devices. Local search engines help users by matching queries with a predefined geographical connotation (``local queries'') against a database of local business listings. Local search differs from traditional Web search because, to correctly capture users' click behavior, the estimation of relevance between query and candidate results must be integrated with geographical signals, such as distance. The intuition is that users prefer businesses that are physically closer to them or in a convenient area (e.g., close to their home). However, this notion of closeness depends upon other factors, like the business category, the quality of the service provided, the density of businesses in the area of interest, the hour of the day, or even the day of the week. In this work, we perform an extensive analysis of online users' interactions with a local search engine, investigating their intent, temporal patterns, and highlighting relationships between distance-to-business and other factors, such as business reputation, Furthermore, we investigate the problem of estimating the click-through rate on local search ( LCTR ) by exploiting the combination of standard retrieval methods with a rich collection of geo-, user-, and business-dependent features. We validate our approach on a large log collected from a real-world local search service. Our evaluation shows that the non-linear combination of business and user information, geo-local and textual relevance features leads to a significant improvements over existing alternative approaches based on a combination of relevance, distance, and business reputation [1].", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Boldi:2018:BMC, author = "Paolo Boldi and Andrea Marino and Massimo Santini and Sebastiano Vigna", title = "{BUbiNG}: Massive Crawling for the Masses", journal = j-TWEB, volume = "12", number = "2", pages = "12:1--12:26", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3160017", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:01 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/java2010.bib; http://www.math.utah.edu/pub/tex/bib/pagerank.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/citation.cfm?doid=3176641.3160017", abstract = "Although web crawlers have been around for twenty years by now, there is virtually no freely available, open-source crawling software that guarantees high throughput, overcomes the limits of single-machine systems, and, at the same time, scales linearly with the amount of resources available. This article aims at filling this gap, through the description of BUbiNG, our next-generation web crawler built upon the authors' experience with UbiCrawler [9] and on the last ten years of research on the topic. BUbiNG is an open-source Java fully distributed crawler; a single BUbiNG agent, using sizeable hardware, can crawl several thousand pages per second respecting strict politeness constraints, both host- and IP-based. Unlike existing open-source distributed crawlers that rely on batch techniques (like MapReduce), BUbiNG job distribution is based on modern high-speed protocols to achieve very high throughput.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", keywords = "BUbiNG; centrality measures; distributed systems; Java; PageRank; UbiCrawler; Web crawling", } @Article{Gaeta:2018:MID, author = "Rossano Gaeta", title = "A Model of Information Diffusion in Interconnected Online Social Networks", journal = j-TWEB, volume = "12", number = "2", pages = "13:1--13:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3160000", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:01 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Online social networks (OSN) have today reached a remarkable capillary diffusion. There are numerous examples of very large platforms people use to communicate and maintain relationships. People also subscribe to several OSNs, e.g., people create accounts on Facebook, Twitter, and so on. This phenomenon leads to online social internetworking (OSI) scenarios where users who subscribe to multiple OSNs are termed as bridges. Unfortunately, several important features make the study of information propagation in an OSI scenario a difficult task, e.g., correlations in both the structural characteristics of OSNs and the bridge interconnections among them, heterogeneity and size of OSNs, activity factors, cross-posting propensity, and so on. In this article, we propose a directed random graph-based model that is amenable to efficient numerical solution to analyze the phenomenon of information propagation in an OSI scenario; in the model development, we take into account heterogeneity and correlations introduced by both topological (correlations among nodes degrees and among bridge distributions) and user-related factors (activity index, cross-posting propensity). We first validate the model predictions against simulations on snapshots of interconnected OSNs in a reference scenario. Subsequently, we exploit the model to show the impact on the information propagation of several characteristics of the reference scenario, i.e., size and complexity of the OSI scenario, degree distribution and overall number of bridges, growth and decline of OSNs in time, and time-varying cross-posting users propensity.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Davison:2018:TR, author = "Brian D. Davison", title = "2017 {TWEB} Reviewers", journal = j-TWEB, volume = "12", number = "2", pages = "14:1--14:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3209033", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jun 28 14:10:01 MDT 2018", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Fogli:2018:EQU, author = "Daniela Fogli and Giovanni Guida", title = "Evaluating Quality in Use of Corporate {Web} Sites: an Empirical Investigation", journal = j-TWEB, volume = "12", number = "3", pages = "15:1--15:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3184646", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In our prior work, we presented a novel approach to the evaluation of quality in use of corporate web sites based on an original quality model (QM-U) and a related methodology (EQ-EVAL). This article focuses on two research questions. The first one aims at investigating whether expected quality obtained through the application of EQ-EVAL methodology by employing a small panel of evaluators is a good approximation of actual quality obtained through experimentation with real users. To answer this research question, a comparative study has been carried out involving 5 evaluators and 50 real users. The second research question aims at demonstrating that the adoption of the EQ-EVAL methodology can provide useful information for web site improvement. Three original indicators, namely coherence, coverage and ranking have been defined to answer this question, and an additional study comparing the assessments of two panels of 5 and 10 evaluators, respectively, has been carried out. The results obtained in both studies are largely positive and provide a rational support for the adoption of the EQ-EVAL methodology.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Park:2018:LLB, author = "Sangkeun Park and Mark S. Ackerman and Uichin Lee", title = "Localness of Location-based Knowledge Sharing: a Study of {Naver KiN} {``Here''}", journal = j-TWEB, volume = "12", number = "3", pages = "16:1--16:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/2983645", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "In location-based social Q8A services, people ask a question with a high expectation that local residents who have local knowledge will answer the question. However, little is known about the locality of user activities in location-based social Q8A services. This study aims to deepen our understanding of location-based knowledge sharing by investigating the following: general behavioral characteristics of users, the topical and typological patterns related to geographic characteristics, geographic locality of user activities, and motivations of local knowledge sharing. To this end, we analyzed a 12-month period Q8A dataset from Naver KiN ``Here,'' a location-based social Q8A mobile app, in addition to a supplementary survey dataset obtained from 285 mobile users. Our results reveal several unique characteristics of location-based social Q8A. When compared with conventional social Q8A sites, users ask and answer different topical/typological questions. In addition, those who answer have a strong spatial locality wherein they primarily have local knowledge in a few regions, in areas such as their home and work. We also find unique motivators such as ownership of local knowledge and a sense of local community. The findings reported in the article have significant implications for the design of Q8A systems, especially location-based social Q8A systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Rudra:2018:ESS, author = "Koustav Rudra and Niloy Ganguly and Pawan Goyal and Saptarshi Ghosh", title = "Extracting and Summarizing Situational Information from the {Twitter} Social Media during Disasters", journal = j-TWEB, volume = "12", number = "3", pages = "17:1--17:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178541", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Microblogging sites like Twitter have become important sources of real-time information during disaster events. A large amount of valuable situational information is posted in these sites during disasters; however, the information is dispersed among hundreds of thousands of tweets containing sentiments and opinions of the masses. To effectively utilize microblogging sites during disaster events, it is necessary to not only extract the situational information from the large amounts of sentiments and opinions, but also to summarize the large amounts of situational information posted in real-time. During disasters in countries like India, a sizable number of tweets are posted in local resource-poor languages besides the normal English-language tweets. For instance, in the Indian subcontinent, a large number of tweets are posted in Hindi/Devanagari (the national language of India), and some of the information contained in such non-English tweets is not available (or available at a later point of time) through English tweets. In this work, we develop a novel classification-summarization framework which handles tweets in both English and Hindi-we first extract tweets containing situational information, and then summarize this information. Our proposed methodology is developed based on the understanding of how several concepts evolve in Twitter during disaster. This understanding helps us achieve superior performance compared to the state-of-the-art tweet classifiers and summarization approaches on English tweets. Additionally, to our knowledge, this is the first attempt to extract situational information from non-English tweets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Darari:2018:CMR, author = "Fariz Darari and Werner Nutt and Giuseppe Pirr{\`o} and Simon Razniewski", title = "Completeness Management for {RDF} Data Sources", journal = j-TWEB, volume = "12", number = "3", pages = "18:1--18:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3196248", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The Semantic Web is commonly interpreted under the open-world assumption, meaning that information available (e.g., in a data source) captures only a subset of the reality. Therefore, there is no certainty about whether the available information provides a complete representation of the reality. The broad aim of this article is to contribute a formal study of how to describe the completeness of parts of the Semantic Web stored in RDF data sources. We introduce a theoretical framework allowing augmentation of RDF data sources with statements, also expressed in RDF, about their completeness. One immediate benefit of this framework is that now query answers can be complemented with information about their completeness. We study the impact of completeness statements on the complexity of query answering by considering different fragments of the SPARQL language, including the RDFS entailment regime, and the federated scenario. We implement an efficient method for reasoning about query completeness and provide an experimental evaluation in the presence of large sets of completeness statements.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wang:2018:OWP, author = "Yue Wang and Dawei Yin and Luo Jie and Pengyuan Wang and Makoto Yamada and Yi Chang and Qiaozhu Mei", title = "Optimizing Whole-Page Presentation for {Web} Search", journal = j-TWEB, volume = "12", number = "3", pages = "19:1--19:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3204461", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Modern search engines aggregate results from different verticals: webpages, news, images, video, shopping, knowledge cards, local maps, and so on. Unlike ``ten blue links,'' these search results are heterogeneous in nature and not even arranged in a list on the page. This revolution directly challenges the conventional ``ranked list'' formulation in ad hoc search. Therefore, finding proper presentation for a gallery of heterogeneous results is critical for modern search engines. We propose a novel framework that learns the optimal page presentation to render heterogeneous results onto search result page (SERP). Page presentation is broadly defined as the strategy to present a set of items on SERP, much more expressive than a ranked list. It can specify item positions, image sizes, text fonts, and any other styles as long as variations are within business and design constraints. The learned presentation is content aware, i.e., tailored to specific queries and returned results. Simulation experiments show that the framework automatically learns eye-catchy presentations for relevant results. Experiments on real data show that simple instantiations of the framework already outperform leading algorithm in federated search result presentation. It means the framework can learn its own result presentation strategy purely from data, without even knowing the ``probability ranking principle.''", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Mula:2018:FBE, author = "Wojciech Mu{\l}a and Daniel Lemire", title = "Faster {Base64} Encoding and Decoding Using {AVX2} Instructions", journal = j-TWEB, volume = "12", number = "3", pages = "20:1--20:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3132709", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/java2010.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Web developers use base64 formats to include images, fonts, sounds, and other resources directly inside HTML, JavaScript, JSON, and XML files. We estimate that billions of base64 messages are decoded every day. We are motivated to improve the efficiency of base64 encoding and decoding. Compared to state-of-the-art implementations, we multiply the speeds of both the encoding ( \approx 10 $ \times $0) and the decoding ( \approx 7 $ \times $). We achieve these good results by using the single-instruction-multiple-data instructions available on recent Intel processors (AVX2). Our accelerated software abides by the specification and reports errors when encountering characters outside of the base64 set. It is available online as free software under a liberal license.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Parisi:2018:TKU, author = "Francesco Parisi and Noseong Park and Andrea Pugliese and V. S. Subrahmanian", title = "Top-k User-Defined Vertex Scoring Queries in Edge-Labeled Graph Databases", journal = j-TWEB, volume = "12", number = "4", pages = "21:1--21:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3213891", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We consider identifying highly ranked vertices in large graph databases such as social networks or the Semantic Web where there are edge labels. There are many applications where users express scoring queries against such databases that involve two elements: (i) a set of patterns describing relationships that a vertex of interest to the user must satisfy and (ii) a scoring mechanism in which the user may use properties of the vertex to assign a score to that vertex. We define the concept of a partial pattern map query (partial PM-query), which intuitively allows us to prune partial matchings, and show that finding an optimal partial PM-query is NP-hard. We then propose two algorithms, PScore_LP and PScore_NWST, to find the answer to a scoring (top- k ) query. In PScore_LP, the optimal partial PM-query is found using a list-oriented pruning method. PScore_NWST leverages node-weighted Steiner trees to quickly compute slightly sub-optimal solutions. We conduct detailed experiments comparing our algorithms with (i) an algorithm (PScore_Base) that computes all answers to the query, evaluates them according to the scoring method, and chooses the top- k, and (ii) two Semantic Web query processing systems (Jena and GraphDB). Our algorithms show better performance than PScore_Base and the Semantic Web query processing systems-moreover, PScore_NWST outperforms PScore_LP on large queries and on queries with a tree structure.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Fanou:2018:EAA, author = "Rod{\'e}rick Fanou and Gareth Tyson and Eder Leao Fernandes and Pierre Francois and Francisco Valera and Arjuna Sathiaseelan", title = "Exploring and Analysing the {African} {Web} Ecosystem", journal = j-TWEB, volume = "12", number = "4", pages = "22:1--22:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3213897", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "It is well known that internet infrastructure deployment is progressing at a rapid pace in the African continent. A flurry of recent research has quantified this, highlighting the expansion of its underlying connectivity network. However, improving the infrastructure is not useful without appropriately provisioned services to exploit it. This article measures the availability and utilisation of web infrastructure in Africa. Whereas others have explored web infrastructure in developed regions, we shed light on practices in developing regions. To achieve this, we apply a comprehensive measurement methodology to collect data from a variety of sources. We first focus on Google to reveal that its content infrastructure in Africa is, indeed, expanding. That said, we find that much of its web content is still served from the US and Europe, despite being the most popular website in many African countries. We repeat the same analysis across a number of other regionally popular websites to find that even top African websites prefer to host their content abroad. To explore the reasons for this, we evaluate some of the major bottlenecks facing content delivery networks (CDNs) in Africa. Amongst other factors, we find a lack of peering between the networks hosting our probes, preventing the sharing of CDN servers, as well as poorly configured DNS resolvers. Finally, our mapping of middleboxes in the region reveals that there is a greater presence of transparent proxies in Africa than in Europe or the US. We conclude the work with a number of suggestions for alleviating the issues observed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Torre-Bastida:2018:RBT, author = "Ana I. Torre-Bastida and Jes{\'u}s Berm{\'u}dez and Arantza Illarramendi", title = "A Rule-Based Transducer for Querying Incompletely Aligned Datasets", journal = j-TWEB, volume = "12", number = "4", pages = "23:1--23:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3228328", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "A growing number of Linked Open Data sources (from diverse provenances and about different domains) that can be freely browsed and searched to find and extract useful information have been made available. However, access to them is difficult for different reasons. This study addresses access issues concerning heterogeneity. It is common for datasets to describe the same or overlapping domains while using different vocabularies. Our study presents a transducer that transforms a SPARQL query suitably expressed in terms of the vocabularies used in a source dataset into another SPARQL query suitably expressed for a target dataset involving different vocabularies. The transformation is based on existing alignments between terms in different datasets. Whenever the transducer is unable to produce a semantically equivalent query because of the scarcity of term alignments, the transducer produces a semantic approximation of the query to avoid returning the empty answer to the user. Transformation across datasets is achieved through the management of a wide range of transformation rules. The feasibility of our proposal has been validated with a prototype implementation that processes queries that appear in well-known benchmarks and SPARQL endpoint logs. Results of the experiments show that the system is quite effective in achieving adequate transformations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "23", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Vassio:2018:YWY, author = "Luca Vassio and Idilio Drago and Marco Mellia and Zied {Ben Houidi} and Mohamed Lamine Lamali", title = "You, the {Web}, and Your Device: Longitudinal Characterization of Browsing Habits", journal = j-TWEB, volume = "12", number = "4", pages = "24:1--24:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3231466", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Understanding how people interact with the web is key for a variety of applications, e.g., from the design of effective web pages to the definition of successful online marketing campaigns. Browsing behavior has been traditionally represented and studied by means of clickstreams, i.e., graphs whose vertices are web pages, and edges are the paths followed by users. Obtaining large and representative data to extract clickstreams is, however, challenging. The evolution of the web questions whether browsing behavior is changing and, by consequence, whether properties of clickstreams are changing. This article presents a longitudinal study of clickstreams from 2013 to 2016. We evaluate an anonymized dataset of HTTP traces captured in a large ISP, where thousands of households are connected. We first propose a methodology to identify actual URLs requested by users from the massive set of requests automatically fired by browsers when rendering web pages. Then, we characterize web usage patterns and clickstreams, taking into account both the temporal evolution and the impact of the device used to explore the web. Our analyses precisely quantify various aspects of clickstreams and uncover interesting patterns, such as the typical short paths followed by people while navigating the web, the fast increasing trend in browsing from mobile devices, and the different roles of search engines and social networks in promoting content. Finally, we contribute a dataset of anonymized clickstreams to the community to foster new studies.\<sup;\>1\</sup;\>", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "24", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Gong:2018:UCS, author = "Qingyuan Gong and Yang Chen and Jiyao Hu and Qiang Cao and Pan Hui and Xin Wang", title = "Understanding Cross-Site Linking in Online Social Networks", journal = j-TWEB, volume = "12", number = "4", pages = "25:1--25:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3213898", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "As a result of the blooming of online social networks (OSNs), a user often holds accounts on multiple sites. In this article, we study the emerging ``cross-site linking'' function available on mainstream OSN services including Foursquare, Quora, and Pinterest. We first conduct a data-driven analysis on crawled profiles and social connections of all 61.39 million Foursquare users to obtain a thorough understanding of this function. Our analysis has shown that the cross-site linking function is adopted by 57.10\% of all Foursquare users, and the users who have enabled this function are more active than others. We also find that the enablement of cross-site linking might lead to privacy risks. Based on cross-site links between Foursquare and external OSN sites, we formulate cross-site information aggregation as a problem that uses cross-site links to stitch together site-local information fields for OSN users. Using large datasets collected from Foursquare, Facebook, and Twitter, we demonstrate the usefulness and the challenges of cross-site information aggregation. In addition to the measurements, we carry out a survey collecting detailed user feedback on cross-site linking. This survey studies why people choose to or not to enable cross-site linking, as well as the motivation and concerns of enabling this function.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "25", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cui:2018:UDR, author = "Yi Cui and Clint Sparkman and Hsin-Tsang Lee and Dmitri Loguinov", title = "Unsupervised Domain Ranking in Large-Scale {Web} Crawls", journal = j-TWEB, volume = "12", number = "4", pages = "26:1--26:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3182180", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/pagerank.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "With the proliferation of web spam and infinite autogenerated web content, large-scale web crawlers require low-complexity ranking methods to effectively budget their limited resources and allocate bandwidth to reputable sites. In this work, we assume crawls that produce frontiers orders of magnitude larger than RAM, where sorting of pending URLs is infeasible in real time. Under these constraints, the main objective is to quickly compute domain budgets and decide which of them can be massively crawled. Those ranked at the top of the list receive aggressive crawling allowances, while all other domains are visited at some small default rate. To shed light on Internet-wide spam avoidance, we study topology-based ranking algorithms on domain-level graphs from the two largest academic crawls: a 6.3B-page IRLbot dataset and a 1B-page ClueWeb09 exploration. We first propose a new methodology for comparing the various rankings and then show that in-degree BFS-based techniques decisively outperform classic PageRank-style methods, including TrustRank. However, since BFS requires several orders of magnitude higher overhead and is generally infeasible for real-time use, we propose a fast, accurate, and scalable estimation method called TSE that can achieve much better crawl prioritization in practice. It is especially beneficial in applications with limited hardware resources.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "26", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{An:2018:IPR, author = "J. An and H. Kwak and S. Jung and J. Salminen and M. Admad and B. Jansen", title = "Imaginary People Representing Real Numbers: Generating Personas from Online Social Media Data", journal = j-TWEB, volume = "12", number = "4", pages = "27:1--27:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3265986", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "We develop a methodology to automate creating imaginary people, referred to as personas, by processing complex behavioral and demographic data of social media audiences. From a popular social media account containing more than 30 million interactions by viewers from 198 countries engaging with more than 4,200 online videos produced by a global media corporation, we demonstrate that our methodology has several novel accomplishments, including: (a) identifying distinct user behavioral segments based on the user content consumption patterns; (b) identifying impactful demographics groupings; and (c) creating rich persona descriptions by automatically adding pertinent attributes, such as names, photos, and personal characteristics. We validate our approach by implementing the methodology into an actual working system; we then evaluate it via quantitative methods by examining the accuracy of predicting content preference of personas, the stability of the personas over time, and the generalizability of the method via applying to two other datasets. Research findings show the approach can develop rich personas representing the behavior and demographics of real audiences using privacy-preserving aggregated online social media data from major online platforms. Results have implications for media companies and other organizations distributing content via online platforms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "27", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Wilson:2019:APP, author = "Shomir Wilson and Florian Schaub and Frederick Liu and Kanthashree Mysore Sathyendra and Daniel Smullen and Sebastian Zimmeck and Rohan Ramanath and Peter Story and Fei Liu and Norman Sadeh and Noah A. Smith", title = "Analyzing Privacy Policies at Scale: From Crowdsourcing to Automated Annotations", journal = j-TWEB, volume = "13", number = "1", pages = "1:1--1:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3230665", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Website privacy policies are often long and difficult to understand. While research shows that Internet users care about their privacy, they do not have the time to understand the policies of every website they visit, and most users hardly ever read privacy policies. Some recent efforts have aimed to use a combination of crowdsourcing, machine learning, and natural language processing to interpret privacy policies at scale, thus producing annotations for use in interfaces that inform Internet users of salient policy details. However, little attention has been devoted to studying the accuracy of crowdsourced privacy policy annotations, how crowdworker productivity can be enhanced for such a task, and the levels of granularity that are feasible for automatic analysis of privacy policies. In this article, we present a trajectory of work addressing each of these topics. We include analyses of crowdworker performance, evaluation of a method to make a privacy-policy oriented task easier for crowdworkers, a coarse-grained approach to labeling segments of policy text with descriptive themes, and a fine-grained approach to identifying user choices described in policy text. Together, the results from these efforts show the effectiveness of using automated and semi-automated methods for extracting from privacy policies the data practice details that are salient to Internet users' interests.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Athanasopoulos:2019:MAX, author = "Dionysis Athanasopoulos and Apostolos Zarras", title = "Mining Abstract {XML} Data-Types", journal = j-TWEB, volume = "13", number = "1", pages = "2:1--2:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3267467", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Schema integration has been a long-standing challenge for the data-engineering community that has received steady attention over the past three decades. General-purpose integration approaches construct unified schemas that encompass all schema elements. Schema integration has been revisited in the past decade in service-oriented computing since the input/output data-types of service interfaces are heterogeneous XML schemas. However, service integration differs from the traditional integration problem, since it should generalize schemas (mining abstract data-types) instead of unifying all schema elements. To mine well-formed abstract data-types, the fundamental Liskov Substitution Principle (LSP), which generally holds between abstract data-types and their subtypes, should be followed. However, due to the heterogeneity of service data-types, the strict employment of LSP is not usually feasible. On top of that, XML offers a rich type system, based on which data-types are defined via combining type patterns (e.g., composition, aggregation). The existing integration approaches have not dealt with the challenges of a defining subtyping relation between XML type patterns. To address these challenges, we propose a relaxed version of LSP between XML type patterns and an automated generalization process for mining abstract XML data-types. We evaluate the effectiveness and the efficiency of the process on the schemas of two datasets against two representative state-of-the-art approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Anisetti:2019:TBS, author = "Marco Anisetti and Claudio Ardagna and Ernesto Damiani and Gianluca Polegri", title = "Test-Based Security Certification of Composite Services", journal = j-TWEB, volume = "13", number = "1", pages = "3:1--3:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3267468", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "The diffusion of service-based and cloud-based systems has created a scenario where software is often made available as services, offered as commodities over corporate networks or the global net. This scenario supports the definition of business processes as composite services, which are implemented via either static or runtime composition of offerings provided by different suppliers. Fast and accurate evaluation of services' security properties becomes then a fundamental requirement and is nowadays part of the software development process. In this article, we show how the verification of security properties of composite services can be handled by test-based security certification and built to be effective and efficient in dynamic composition scenarios. Our approach builds on existing security certification schemes for monolithic services and extends them towards service compositions. It virtually certifies composite services, starting from certificates awarded to the component services. We describe three heuristic algorithms for generating runtime test-based evidence of the composite service holding the properties. These algorithms are compared with the corresponding exhaustive algorithm to evaluate their quality and performance. We also evaluate the proposed approach in a real-world industrial scenario, which considers ENGpay online payment system of Engineering Ingegneria Informatica S.p.A. The proposed industrial evaluation presents the utility and generality of the proposed approach by showing how certification results can be used as a basis to establish compliance to Payment Card Industry Data Security Standard.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Redmiles:2019:NPW, author = "Elissa M. Redmiles and Eszter Hargittai", title = "New Phone, Who Dis? {Modeling} Millennials' Backup Behavior", journal = j-TWEB, volume = "13", number = "1", pages = "4:1--4:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3208105", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Given the ever-rising frequency of malware attacks and other problems leading people to lose their files, backups are an important proactive protective behavior in which users can engage. Backing up files can prevent emotional and financial losses and improve overall user experience. Yet, we find that less than half of young adults perform mobile or computer backups regularly. To understand why, we model the factors that drive mobile and computer backup behavior, and changes in that behavior over time, using data from a panel survey of 384 diverse young adults. We develop a set of models that explain 37\% and 38\% of the variance in reported mobile and computer backup behaviors, respectively. These models show consistent relationships between Internet skills and backup frequency on both mobile and computer devices. We find that this relationship holds longitudinally: increases in Internet skills lead to increased frequency of computer backups. This article provides a foundation for understanding what drives young adults' backup behavior. It concludes with recommendations for motivating people to back up, and for future work, modeling similar user behaviors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Calegari:2019:WPH, author = "Patrice Calegari and Marc Levrier and Pawe{\l} Balczy{\'n}ski", title = "{Web} Portals for High-performance Computing: a Survey", journal = j-TWEB, volume = "13", number = "1", pages = "5:1--5:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3197385", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/java2010.bib; http://www.math.utah.edu/pub/tex/bib/super.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "This article addresses web interfaces for High-performance Computing (HPC) simulation software. First, it presents a brief history, starting in the 1990s with Java applets, of web interfaces used for accessing and making best possible use of remote HPC resources. It introduces HPC web-based portal use cases. Then it identifies and discusses the key features, among functional and non-functional requirements, that characterize such portals. A brief state of the art is then presented. The design and development of Bull extreme factory Computing Studio v3 (XCS3) is chosen as a common thread for showing how the identified key features can all be implemented in one software: multi-tenancy, multi-scheduler compatibility, complete control through an HTTP RESTful API, customizable user interface with Responsive Web Design, HPC application template framework, remote visualization, and access through the Authentication, Authorization, and Accounting security framework with the Role-Based Access Control permission model. Non-functional requirements (security, usability, performance, reliability) are discussed, and the article concludes by giving perspective for future work.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Romero:2019:SNU, author = "Daniel M. Romero and Brian Uzzi and Jon Kleinberg", title = "Social Networks under Stress: Specialized Team Roles and Their Communication Structure", journal = j-TWEB, volume = "13", number = "1", pages = "6:1--6:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3295460", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Social network research has begun to take advantage of fine-grained communications regarding coordination, decision-making, and knowledge sharing. These studies, however, have not generally analyzed how external events are associated with a social network's structure and communicative properties. Here, we study how external events are associated with a network's change in structure and communications. Analyzing a complete dataset of millions of instant messages among the decision-makers with different roles in a large hedge fund and their network of outside contacts, we investigate the link between price shocks, network structure, and change in the affect and cognition of decision-makers embedded in the network. We also analyze the communication dynamics among specialized teams in the organization. When price shocks occur the communication network tends not to display structural changes associated with adaptiveness such as the activation of weak ties to obtain novel information. Rather, the network ``turtles up.'' It displays a propensity for higher clustering, strong tie interaction, and an intensification of insider vs. outsider and within-role vs. between-role communication. Further, we find changes in network structure predict shifts in cognitive and affective processes, execution of new transactions, and local optimality of transactions better than prices, revealing the important predictive relationship between network structure and collective behavior within a social network.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Gilani:2019:LSB, author = "Zafar Gilani and Reza Farahbakhsh and Gareth Tyson and Jon Crowcroft", title = "A Large-scale Behavioural Analysis of Bots and Humans on {Twitter}", journal = j-TWEB, volume = "13", number = "1", pages = "7:1--7:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3298789", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:06 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Recent research has shown a substantial active presence of bots in online social networks (OSNs). In this article, we perform a comparative analysis of the usage and impact of bots and humans on Twitter-one of the largest OSNs in the world. We collect a large-scale Twitter dataset and define various metrics based on tweet metadata. Using a human annotation task, we assign ``bot'' and ``human'' ground-truth labels to the dataset and compare the annotations against an online bot detection tool for evaluation. We then ask a series of questions to discern important behavioural characteristics of bots and humans using metrics within and among four popularity groups. From the comparative analysis, we draw clear differences and interesting similarities between the two entities.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Thoma:2019:FEC, author = "Steffen Thoma and Andreas Thalhammer and Andreas Harth and Rudi Studer", title = "{FusE}: Entity-Centric Data Fusion on Linked Data", journal = j-TWEB, volume = "13", number = "2", pages = "8:1--8:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3306128", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:07 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3306128", abstract = "Many current web pages include structured data which can directly be processed and used. Search engines, in particular, gather that structured data and provide question answering capabilities over the integrated data with an entity-centric presentation of the results. Due to the decentralized nature of the web, multiple structured data sources can provide similar information about an entity. But data from different sources may involve different vocabularies and modeling granularities, which makes integration difficult. We present FusE, an approach that identifies similar entity-specific data across sources, independent of the vocabulary and data modeling choices. We apply our method along the scenario of a trustable knowledge panel, conduct experiments in which we identify and process entity data from web sources, and compare the output to a competing system. The results underline the advantages of the presented entity-centric data fusion approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Alarte:2019:WWT, author = "Juli{\'a}n Alarte and Josep Silva and Salvador Tamarit", title = "What {Web} Template Extractor Should {I} Use? {A} Benchmarking and Comparison for Five Template Extractors", journal = j-TWEB, volume = "13", number = "2", pages = "9:1--9:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3316810", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:07 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3316810", abstract = "A Web template is a resource that implements the structure and format of a website, making it ready for plugging content into already formatted and prepared pages. For this reason, templates are one of the main development resources for website engineers, because they increase productivity. Templates are also useful for the final user, because they provide uniformity and a common look and feel for all webpages. However, from the point of view of crawlers and indexers, templates are an important problem, because templates usually contain irrelevant information, such as advertisements, menus, and banners. Processing and storing this information leads to a waste of resources (storage space, bandwidth, etc.). It has been measured that templates represent between 40\% and 50\% of data on the Web. Therefore, identifying templates is essential for indexing tasks. There exist many techniques and tools for template extraction, but, unfortunately, it is not clear at all which template extractor should a user/system use, because they have never been compared, and because they present different (complementary) features such as precision, recall, and efficiency. In this work, we compare the most advanced template extractors. We implemented and evaluated five of the most advanced template extractors in the literature. To compare all of them, we implemented a workbench, where they have been integrated and evaluated. Thanks to this workbench, we can provide a fair empirical comparison of all methods using the same benchmarks, technology, implementation language, and evaluation criteria.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Vicario:2019:PFN, author = "Michela {Del Vicario} and Walter Quattrociocchi and Antonio Scala and Fabiana Zollo", title = "Polarization and Fake News: Early Warning of Potential Misinformation Targets", journal = j-TWEB, volume = "13", number = "2", pages = "10:1--10:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3316809", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:07 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3316809", abstract = "Users' polarization and confirmation bias play a key role in misinformation spreading on online social media. Our aim is to use this information to determine in advance potential targets for hoaxes and fake news. In this article, we introduce a framework for promptly identifying polarizing content on social media and, thus, ``predicting'' future fake news topics. We validate the performances of the proposed methodology on a massive Italian Facebook dataset, showing that we are able to identify topics that are susceptible to misinformation with 77\% accuracy. Moreover, such information may be embedded as a new feature in an additional classifier able to recognize fake news with 91\% accuracy. The novelty of our approach consists in taking into account a series of characteristics related to users' behavior on online social media such as Facebook, making a first, important step towards the mitigation of misinformation phenomena by supporting the identification of potential misinformation targets and thus the design of tailored counter-narratives.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Cresci:2019:CPU, author = "Stefano Cresci and Fabrizio Lillo and Daniele Regoli and Serena Tardelli and Maurizio Tesconi", title = "Cashtag Piggybacking: Uncovering Spam and Bot Activity in Stock Microblogs on {Twitter}", journal = j-TWEB, volume = "13", number = "2", pages = "11:1--11:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3313184", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:07 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3313184", abstract = "Microblogs are increasingly exploited for predicting prices and traded volumes of stocks in financial markets. However, it has been demonstrated that much of the content shared in microblogging platforms is created and publicized by bots and spammers. Yet, the presence (or lack thereof) and the impact of fake stock microblogs has never been systematically investigated before. Here, we study 9M tweets related to stocks of the five main financial markets in the US. By comparing tweets with financial data from Google Finance, we highlight important characteristics of Twitter stock microblogs. More importantly, we uncover a malicious practice-referred to as cashtag piggybacking -perpetrated by coordinated groups of bots and likely aimed at promoting low-value stocks by exploiting the popularity of high-value ones. Among the findings of our study is that as much as 71\% of the authors of suspicious financial tweets are classified as bots by a state-of-the-art spambot-detection algorithm. Furthermore, 37\% of them were suspended by Twitter a few months after our investigation. Our results call for the adoption of spam- and bot-detection techniques in all studies and applications that exploit user-generated content for predicting the stock market.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Watanabe:2019:LCP, author = "Willian Massami Watanabe and Giovana L{\'a}zaro Am{\^e}ndola and Fagner Christian Paes", title = "Layout Cross-Platform and Cross-Browser Incompatibilities Detection using Classification of {DOM} Elements", journal = j-TWEB, volume = "13", number = "2", pages = "12:1--12:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3316808", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:07 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3316808", abstract = "Web applications can be accessed through a variety of user agent configurations, in which the browser, platform, and device capabilities are not under the control of developers. In order to grant the compatibility of a web application in each environment, developers must manually inspect their web application in a wide variety of devices, platforms, and browsers. Web applications can be rendered inconsistently depending on the browser, the platform, and the device capabilities which are used. Furthermore, the devices' different viewport widths impact the way web applications are rendered in them, in which elements can be resized and change their absolute positions in the display. These adaptation strategies must also be considered in automatic incompatibility detection approaches in the state of the art. Hence, we propose a classification approach for detecting Layout Cross-platform and Cross-browser incompatibilities, which considers the adaptation strategies used in responsive web applications. Our approach is an extension of previous Cross-browser incompatibility detection approaches and has the goal of reducing the cost associated with manual inspections in different devices, platforms, and browsers, by automatically detecting Layout incompatibilities in this scenario. The proposed approach classifies each DOM element which composes a web application as an incompatibility or not, based on its attributes, position, alignment, screenshot, and the viewport width of the browser. We report the results of an experiment conducted with 42 Responsive Web Applications, rendered in three devices (Apple iPhone SE, Apple iPhone 8 Plus, and Motorola Moto G4) and browsers (Google Chrome and Apple Safari). The results (with F-measure of 0.70) showed evidence which quantify the effectiveness of our classification approach, and it could be further enhanced for detecting Cross-platform and Cross-browser incompatibilities. Furthermore, in the experiment, our approach also performed better when compared to a former state-of-the-art classification technique for Cross-browser incompatibilities detection.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Sigg:2019:EUP, author = "Stephan Sigg and Eemil Lagerspetz and Ella Peltonen and Petteri Nurmi and Sasu Tarkoma", title = "Exploiting Usage to Predict Instantaneous App Popularity: Trend Filters and Retention Rates", journal = j-TWEB, volume = "13", number = "2", pages = "13:1--13:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3199677", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:07 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3199677", abstract = "Popularity of mobile apps is traditionally measured by metrics such as the number of downloads, installations, or user ratings. A problem with these measures is that they reflect usage only indirectly. Indeed, retention rates, i.e., the number of days users continue to interact with an installed app, have been suggested to predict successful app lifecycles. We conduct the first independent and large-scale study of retention rates and usage trends on a dataset of app-usage data from a community of 339,842 users and more than 213,667 apps. Our analysis shows that, on average, applications lose 65\% of their users in the first week, while very popular applications (top 100) lose only 35\%. It also reveals, however, that many applications have more complex usage behaviour patterns due to seasonality, marketing, or other factors. To capture such effects, we develop a novel app-usage trend measure which provides instantaneous information about the popularity of an application. Analysis of our data using this trend filter shows that roughly 40\% of all apps never gain more than a handful of users ( Marginal apps). Less than 0.1\% of the remaining 60\% are constantly popular ( Dominant apps), 1\% have a quick drain of usage after an initial steep rise ( Expired apps), and 6\% continuously rise in popularity ( Hot apps). From these, we can distinguish, for instance, trendsetters from copycat apps. We conclude by demonstrating that usage behaviour trend information can be used to develop better mobile app recommendations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Alorainy:2019:EAU, author = "Wafa Alorainy and Pete Burnap and Han Liu and Matthew L. Williams", title = "{``The Enemy Among Us''}: Detecting Cyber Hate Speech with Threats-based Othering Language Embeddings", journal = j-TWEB, volume = "13", number = "3", pages = "14:1--14:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3324997", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:07 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3324997", abstract = "Offensive or antagonistic language targeted at individuals and social groups based on their personal characteristics (also known as cyber hate speech or cyberhate) has been frequently posted and widely circulated via the World Wide Web. This can be considered as a key risk factor for individual and societal tension surrounding regional instability. Automated Web-based cyberhate detection is important for observing and understanding community and regional societal tension-especially in online social networks where posts can be rapidly and widely viewed and disseminated. While previous work has involved using lexicons, bags-of-words, or probabilistic language parsing approaches, they often suffer from a similar issue, which is that cyberhate can be subtle and indirect-thus, depending on the occurrence of individual words or phrases, can lead to a significant number of false negatives, providing inaccurate representation of the trends in cyberhate. This problem motivated us to challenge thinking around the representation of subtle language use, such as references to perceived threats from ``the other'' including immigration or job prosperity in a hateful context. We propose a novel ``othering'' feature set that utilizes language use around the concept of ``othering'' and intergroup threat theory to identify these subtleties, and we implement a wide range of classification methods using embedding learning to compute semantic distances between parts of speech considered to be part of an ``othering'' narrative. To validate our approach, we conducted two sets of experiments. The first involved comparing the results of our novel method with state-of-the-art baseline models from the literature. Our approach outperformed all existing methods. The second tested the best performing models from the first phase on unseen datasets for different types of cyberhate, namely religion, disability, race, and sexual orientation. The results showed F-measure scores for classifying hateful instances obtained through applying our model of 0.81, 0.71, 0.89, and 0.72, respectively, demonstrating the ability of the ``othering'' narrative to be an important part of model generalization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Zhao:2019:USE, author = "Liping Zhao and Pericles Loucopoulos and Evangelia Kavakli and Keletso J. Letsholo", title = "User Studies on End-User Service Composition: a Literature Review and a Design Framework", journal = j-TWEB, volume = "13", number = "3", pages = "15:1--15:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3340294", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:07 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3340294", abstract = "Context: End-user service composition (EUSC) is a service-oriented paradigm that aims to empower end users and allow them to compose their own web applications from reusable service components. User studies have been used to evaluate EUSC tools and processes. Such an approach should benefit software development, because incorporating end users' feedback into software development should make software more useful and usable. Problem: There is a gap in our understanding of what constitutes a user study and how a good user study should be designed, conducted, and reported. Goal: This article aims to address this gap. Method: The article presents a systematic review of 47 selected user studies for EUSC. Guided by a review framework, the article systematically and consistently assesses the focus, methodology and cohesion of each of these studies. Results: The article concludes that the focus of these studies is clear, but their methodology is incomplete and inadequate, their overall cohesion is poor. The findings lead to the development of a design framework and a set of questions for the design, reporting, and review of good user studies for EUSC. The detailed analysis and the insights obtained from the analysis should be applicable to the design of user studies for service-oriented systems as well and indeed for any user studies related to software artifacts.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Chatzakou:2019:DCC, author = "Despoina Chatzakou and Ilias Leontiadis and Jeremy Blackburn and Emiliano {De Cristofaro} and Gianluca Stringhini and Athena Vakali and Nicolas Kourtellis", title = "Detecting Cyberbullying and Cyberaggression in Social Media", journal = j-TWEB, volume = "13", number = "3", pages = "17:1--17:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3343484", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Oct 22 08:10:07 MDT 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3343484", abstract = "Cyberbullying and cyberaggression are increasingly worrisome phenomena affecting people across all demographics. More than half of young social media users worldwide have been exposed to such prolonged and/or coordinated digital harassment. Victims can experience a wide range of emotions, with negative consequences such as embarrassment, depression, isolation from other community members, which embed the risk to lead to even more critical consequences, such as suicide attempts. In this work, we take the first concrete steps to understand the characteristics of abusive behavior in Twitter, one of today's largest social media platforms. We analyze 1.2 million users and 2.1 million tweets, comparing users participating in discussions around seemingly normal topics like the NBA, to those more likely to be hate-related, such as the Gamergate controversy, or the gender pay inequality at the BBC station. We also explore specific manifestations of abusive behavior, i.e., cyberbullying and cyberaggression, in one of the hate-related communities (Gamergate). We present a robust methodology to distinguish bullies and aggressors from normal Twitter users by considering text, user, and network-based attributes. Using various state-of-the-art machine-learning algorithms, we classify these accounts with over 90\% accuracy and AUC. Finally, we discuss the current status of Twitter user accounts marked as abusive by our methodology and study the performance of potential mechanisms that can be used by Twitter to suspend users in the future.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Perino:2019:LTM, author = "Diego Perino and Matteo Varvello and Claudio Soriente", title = "Long-term Measurement and Analysis of the Free Proxy Ecosystem", journal = j-TWEB, volume = "13", number = "4", pages = "18:1--18:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3360695", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Dec 21 07:39:03 MST 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3360695", abstract = "Free web proxies promise anonymity and censorship circumvention at no cost. Several websites publish lists of free proxies organized by country, anonymity level, and performance. These lists index hundreds of thousands of hosts discovered via automated tools and crowd-sourcing. A complex free proxy ecosystem has been forming over the years, of which very little is known. In this article, we shed light on this ecosystem via a distributed measurement platform that leverages both active and passive measurements. Active measurements are carried out by an infrastructure we name ProxyTorrent, which discovers free proxies, assesses their performance, and detects potential malicious activities. Passive measurements focus on proxy performance and usage in the wild, and are accomplished by means of a Chrome extension named Ciao. ProxyTorrent has been running since January 2017, monitoring up to 230K free proxies. Ciao was launched in March 2017 and has thus far served roughly 9.7K users and generated 14TB of traffic. Our analysis shows that less than 2\% of the proxies announced on the Web indeed proxy traffic on behalf of users; further, only half of these proxies have decent performance and can be used reliably. Every day, around 5\%--10\% of the active proxies exhibit malicious behaviors, e.g., advertisement injection, TLS interception, and cryptojacking, and these proxies are also the ones providing the best performance. Through the analysis of more than 14TB of proxied traffic, we show that web browsing is the primary user activity. Geo-blocking avoidance-allegedly a popular use case for free web proxies-accounts for 30\% or less of the traffic, and it mostly involves countries hosting popular geo-blocked content.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Liu:2019:FPS, author = "Daxin Liu and Gong Cheng and Qingxia Liu and Yuzhong Qu", title = "Fast and Practical Snippet Generation for {RDF} Datasets", journal = j-TWEB, volume = "13", number = "4", pages = "19:1--19:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3365575", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Dec 21 07:39:03 MST 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3365575", abstract = "Triple-structured open data creates value in many ways. However, the reuse of datasets is still challenging. Users feel difficult to assess the usefulness of a large dataset containing thousands or millions of triples. To satisfy the needs, existing abstractive methods produce a concise high-level abstraction of data. Complementary to that, we adopt the extractive strategy and aim to select the optimum small subset of data from a dataset as a snippet to compactly illustrate the content of the dataset. This has been formulated as a combinatorial optimization problem in our previous work. In this article, we design a new algorithm for the problem, which is an order of magnitude faster than the previous one but has the same approximation ratio. We also develop an anytime algorithm that can generate empirically better solutions using additional time. To suit datasets that are partially accessible via online query services (e.g., SPARQL endpoints for RDF data), we adapt our algorithms to trade off quality of snippet for feasibility and efficiency in the Web environment. We carry out extensive experiments based on real RDF datasets and SPARQL endpoints for evaluating quality and running time. The results demonstrate the effectiveness and practicality of our proposed algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Manica:2019:CUH, author = "Edimar Manica and Carina Friedrich Dorneles and Renata Galante", title = "Combining {URL} and {HTML} Features for Entity Discovery in the {Web}", journal = j-TWEB, volume = "13", number = "4", pages = "20:1--20:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3365574", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Dec 21 07:39:03 MST 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3365574", abstract = "The web is a large repository of entity-pages. An entity-page is a page that publishes data representing an entity of a particular type, for example, a page that describes a driver on a website about a car racing championship. The attribute values published in the entity-pages can be used for many data-driven companies, such as insurers, retailers, and search engines. In this article, we define a novel method, called SSUP, which discovers the entity-pages on the websites. The novelty of our method is that it combines URL and HTML features in a way that allows the URL terms to have different weights depending on their capacity to distinguish entity-pages from other pages, and thus the efficacy of the entity-page discovery task is increased. SSUP determines the similarity thresholds on each website without human intervention. We carried out experiments on a dataset with different real-world websites and a wide range of entity types. SSUP achieved a 95\% rate of precision and 85\% recall rate. Our method was compared with two state-of-the-art methods and outperformed them with a precision gain between 51\% and 66\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Yu:2019:EPP, author = "Weiren Yu and Julie McCann and Chengyuan Zhang", title = "Efficient Pairwise Penetrating-rank Similarity Retrieval", journal = j-TWEB, volume = "13", number = "4", pages = "21:1--21:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3368616", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Dec 21 07:39:03 MST 2019", bibsource = "http://www.math.utah.edu/pub/tex/bib/pagerank.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", abstract = "Many web applications demand a measure of similarity between two entities, such as collaborative filtering, web document ranking, linkage prediction, and anomaly detection. P-Rank (Penetrating-Rank) has been accepted as a promising graph-based similarity measure, as it provides a comprehensive way of encoding both incoming and outgoing links into assessment. However, the existing method to compute P-Rank is iterative in nature and rather cost-inhibitive. Moreover, the accuracy estimate and stability issues for P-Rank computation have not been addressed. In this article, we consider the optimization techniques for P-Rank search that encompasses its accuracy, stability, and computational efficiency. (1) The accuracy estimation is provided for P-Rank iterations, with the aim to find out the number of iterations, $k$, required to guarantee a desired accuracy. (2) A rigorous bound on the condition number of P-Rank is obtained for stability analysis. Based on this bound, it can be shown that P-Rank is stable and well-conditioned when the damping factors are chosen to be suitably small. (3) Two matrix-based algorithms, applicable to digraphs and undirected graphs, are, respectively, devised for efficient P-Rank computation, which improves the computational time from $ O(k n^3) $ to $ O(\upsilon n^2 + \upsilon^6) $ for digraphs, and to $ O(\upsilon n^2) $ for undirected graphs, where $n$ is the number of vertices in the graph, and $ \upsilon (\ll n)$ is the target rank of the graph. Moreover, our proposed algorithms can significantly reduce the memory space of P-Rank computations from $ O(n^2) $ to $ O(\upsilon n + \upsilon^4) $ for digraphs, and to $ O(\upsilon n) $ for undirected graphs, respectively. Finally, extensive experiments on real-world and synthetic datasets demonstrate the usefulness and efficiency of the proposed techniques for P-Rank similarity assessment on various networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1062", } @Article{Eraslan:2020:BBW, author = "Sukru Eraslan and Yeliz Yesilada and Simon Harper", title = "{``The Best of Both Worlds!''}: Integration of {Web} Page and Eye Tracking Data Driven Approaches for Automatic {AOI} Detection", journal = j-TWEB, volume = "14", number = "1", pages = "1:1--1:31", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372497", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Feb 8 06:24:56 MST 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372497", abstract = "Web pages are composed of different kinds of elements (menus, adverts, etc.). Segmenting pages into their elements has long been important in understanding how people experience those pages and in making those experiences {``better.''} Many approaches have \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Shah:2020:OMA, author = "Ankit Shah and Rajesh Ganesan and Sushil Jajodia and Hasan Cam", title = "An Outsourcing Model for Alert Analysis in a Cybersecurity Operations Center", journal = j-TWEB, volume = "14", number = "1", pages = "2:1--2:22", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372498", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Feb 8 06:24:56 MST 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372498", abstract = "A typical Cybersecurity Operations Center (CSOC) is a service organization. It hires and trains analysts, whose task is to perform analysis of alerts that were generated while monitoring the client's networks. Due to ever-increasing financial and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Abulaish:2020:SFL, author = "Muhammad Abulaish and Ashraf Kamal and Mohammed J. Zaki", title = "A Survey of Figurative Language and Its Computational Detection in Online Social Networks", journal = j-TWEB, volume = "14", number = "1", pages = "3:1--3:52", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3375547", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Feb 8 06:24:56 MST 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375547", abstract = "The frequent usage of figurative language on online social networks, especially on Twitter, has the potential to mislead traditional sentiment analysis and recommender systems. Due to the extensive use of slangs, bashes, flames, and non-literal texts, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Hassanpour:2020:IAV, author = "Masoud Hassanpour and Seyed Amir Hoseinitabatabaei and Payam Barnaghi and Rahim Tafazolli", title = "Improving the Accuracy of the Video Popularity Prediction Models through User Grouping and Video Popularity Classification", journal = j-TWEB, volume = "14", number = "1", pages = "4:1--4:28", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372499", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Feb 8 06:24:56 MST 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372499", abstract = "This article proposes a novel approach for enhancing the video popularity prediction models. Using the proposed approach, we enhance three popularity prediction techniques that outperform the accuracy of the prior state-of-the-art solutions. The major \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Li:2020:TAS, author = "Guohui Li and Qi Chen and Bolong Zheng and Nguyen Quoc Viet Hung and Pan Zhou and Guanfeng Liu", title = "Time-aspect-sentiment Recommendation Models Based on Novel Similarity Measure Methods", journal = j-TWEB, volume = "14", number = "2", pages = "5:1--5:26", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3375548", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 21 08:25:53 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375548", abstract = "The explosive growth of e-commerce has led to the development of the recommendation system. The recommendation system aims to provide a set of items that meet users' personalized needs through analyzing users' consumption records. However, the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Carpineto:2020:ESA, author = "Claudio Carpineto and Giovanni Romano", title = "An Experimental Study of Automatic Detection and Measurement of Counterfeit in Brand Search Results", journal = j-TWEB, volume = "14", number = "2", pages = "6:1--6:35", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3378443", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 21 08:25:53 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3378443", abstract = "Brand search results are poisoned by fake ecommerce websites that infringe on the trademark rights of legitimate holders. In this article, we study how to tackle and measure this problem automatically. We present a pipeline with two machine learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Tonge:2020:IPP, author = "Ashwini Tonge and Cornelia Caragea", title = "Image Privacy Prediction Using Deep Neural Networks", journal = j-TWEB, volume = "14", number = "2", pages = "7:1--7:32", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3386082", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 21 08:25:53 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3386082", abstract = "Images today are increasingly shared online on social networking sites such as Facebook, Flickr, and Instagram. Image sharing occurs not only within a group of friends but also more and more outside a user's social circles for purposes of social \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Laperdrix:2020:BFS, author = "Pierre Laperdrix and Nataliia Bielova and Benoit Baudry and Gildas Avoine", title = "Browser Fingerprinting: a Survey", journal = j-TWEB, volume = "14", number = "2", pages = "8:1--8:33", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3386040", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 21 08:25:53 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3386040", abstract = "With this article, we survey the research performed in the domain of browser fingerprinting, while providing an accessible entry point to newcomers in the field. We explain how this technique works and where it stems from. We analyze the related work in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Shi:2020:TAW, author = "Min Shi and Yufei Tang and Xingquan Zhu and Jianxun Liu", title = "Topic-aware {Web} Service Representation Learning", journal = j-TWEB, volume = "14", number = "2", pages = "9:1--9:23", month = apr, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3386041", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Tue Apr 21 08:25:53 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3386041", abstract = "The advent of Service-Oriented Architecture (SOA) has brought a fundamental shift in the way in which distributed applications are implemented. An overwhelming number of Web-based services (e.g., APIs and Mashups) have leveraged this shift and furthered \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Xiao:2020:PRF, author = "Zhijun Xiao and Cuiping Li and Hong Chen", title = "{PatternRank+NN}: a Ranking Framework Bringing User Behaviors into Entity Set Expansion from {Web} Search Queries", journal = j-TWEB, volume = "14", number = "3", pages = "10:1--10:15", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3386042", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 22 17:29:55 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/pagerank.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3386042", abstract = "We propose a ranking framework, called PatternRank+NN, for expanding a set of seed entities of a particular class (i.e., entity set expansion) from Web search queries. PatternRank+NN consists of two parts: PatternRank and NN. Unlike the traditional \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Wu:2020:STI, author = "Huijun Wu and Chen Wang and Richard Nock and Wei Wang and Jie Yin and Kai Lu and Liming Zhu", title = "{SMINT}: Toward Interpretable and Robust Model Sharing for Deep Neural Networks", journal = j-TWEB, volume = "14", number = "3", pages = "11:1--11:28", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3381833", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 22 17:29:55 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3381833", abstract = "Sharing a pre-trained machine learning model, particularly a deep neural network via prediction APIs, is becoming a common practice on machine learning as a service (MLaaS) platforms nowadays. Although deep neural networks (DNN) have shown remarkable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Chattopadhyay:2020:QAA, author = "Soumi Chattopadhyay and Ansuman Banerjee", title = "{QoS}-aware Automatic {Web} Service Composition with Multiple Objectives", journal = j-TWEB, volume = "14", number = "3", pages = "12:1--12:38", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3389147", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 22 17:29:55 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3389147", abstract = "Automatic web service composition has received a significant research attention in service-oriented computing over decades of research. With increasing number of web services, providing an end-to-end Quality of Service (QoS) guarantee in responding to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Wu:2020:SAR, author = "Zhiang Wu and Changsheng Li and Jie Cao and Yong Ge", title = "On Scalability of Association-rule-based Recommendation: a Unified Distributed-computing Framework", journal = j-TWEB, volume = "14", number = "3", pages = "13:1--13:21", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3398202", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 22 17:29:55 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3398202", abstract = "The association-rule-based approach is one of the most common technologies for building recommender systems and it has been extensively adopted for commercial use. A variety of techniques, mainly including eligible rule selection and multiple rules \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Ma:2020:RTC, author = "Yun Ma and Ziniu Hu and Diandian Gu and Li Zhou and Qiaozhu Mei and Gang Huang and Xuanzhe Liu", title = "Roaming Through the Castle Tunnels: an Empirical Analysis of Inter-app Navigation of {Android} Apps", journal = j-TWEB, volume = "14", number = "3", pages = "14:1--14:24", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3395050", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Jul 22 17:29:55 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3395050", abstract = "Smartphone applications (a.k.a., apps) have become indispensable in our everyday life and work. In practice, accomplishing a task on smartphones may require the user to navigate among various apps. Unlike Web pages that are inherently interconnected \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Jeong:2020:DOW, author = "Hyuk-Jin Jeong and Inchang Jeong and Soo-Mook Moon", title = "Dynamic Offloading of {Web} Application Execution Using Snapshot", journal = j-TWEB, volume = "14", number = "4", pages = "15:1--15:24", month = sep, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3402124", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Sep 5 18:55:05 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3402124", abstract = "Mobile web platforms are facing new demands for emerging applications, such as machine learning or augmented reality, which require significant computing powers beyond that of current mobile hardware. Computation offloading can accelerate these apps by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Javed:2020:EBD, author = "Amir Javed and Pete Burnap and Matthew L. Williams and Omer F. Rana", title = "Emotions Behind Drive-by Download Propagation on {Twitter}", journal = j-TWEB, volume = "14", number = "4", pages = "16:1--16:26", month = sep, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3408894", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Sep 5 18:55:05 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3408894", abstract = "Twitter has emerged as one of the most popular platforms to get updates on entertainment and current events. However, due to its 280-character restriction and automatic shortening of URLs, it is continuously targeted by cybercriminals to carry out drive-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Mittos:2020:AGT, author = "Alexandros Mittos and Savvas Zannettou and Jeremy Blackburn and Emiliano {De Cristofaro}", title = "Analyzing Genetic Testing Discourse on the {Web} Through the Lens of {Twitter}, {Reddit}, and {4chan}", journal = j-TWEB, volume = "14", number = "4", pages = "17:1--17:38", month = sep, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3404994", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Sep 5 18:55:05 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3404994", abstract = "Recent progress in genomics has enabled the emergence of a flourishing market for direct-to-consumer (DTC) genetic testing. Companies like 23andMe and AncestryDNA provide affordable health, genealogy, and ancestry reports, and have already tested tens \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Zubiaga:2020:EDS, author = "Arkaitz Zubiaga and Aiqi Jiang", title = "Early Detection of Social Media Hoaxes at Scale", journal = j-TWEB, volume = "14", number = "4", pages = "18:1--18:23", month = sep, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3407194", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Sep 5 18:55:05 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3407194", abstract = "The unmoderated nature of social media enables the diffusion of hoaxes, which in turn jeopardises the credibility of information gathered from social media platforms. Existing research on automated detection of hoaxes has the limitation of using \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Mazumdar:2020:CSP, author = "Pramit Mazumdar and Bidyut Kr. Patra and Korra Sathya Babu", title = "Cold-start Point-of-interest Recommendation through Crowdsourcing", journal = j-TWEB, volume = "14", number = "4", pages = "19:1--19:36", month = sep, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3407182", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Sep 5 18:55:05 MDT 2020", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3407182", abstract = "Recommender system is a popular tool that aims to provide personalized suggestions to user about items, products, services, and so on. Recommender system has effectively been used in online social networks, especially the location-based social networks \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Cao:2021:CCB, author = "Jian Cao and Tingjie Jia and Shiyou Qian and Haiyan Zhao and Jie Wang", title = "{CBPCS}: a Cache-block-based Service Process Caching Strategy to Accelerate the Execution of Service Processes", journal = j-TWEB, volume = "15", number = "1", pages = "1:1--1:29", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3411494", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 22 08:52:18 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3411494", abstract = "With the development of cloud computing and the advent of the Web 2.0 era, composing a set of Web services as a service process is becoming a common practice to provide more functional services. However, a service process involves multiple service \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Vidyapu:2021:IMW, author = "Sandeep Vidyapu and Vijaya Saradhi Vedula and Samit Bhattacharya", title = "Investigating and Modeling the {Web} Elements' Visual Feature Influence on Free-viewing Attention", journal = j-TWEB, volume = "15", number = "1", pages = "2:1--2:27", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3409474", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 22 08:52:18 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3409474", abstract = "User attentional analyses on web elements help in synthesis and rendering of webpages. However, majority of the existing analyses are limited in incorporating the intrinsic visual features of text and images. This study aimed to analyze the influence of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Chen:2021:DVE, author = "Xu Chen and Jiangchao Yao and Maosen Li and Ya Zhang and Yanfeng Wang", title = "Decoupled Variational Embedding for Signed Directed Networks", journal = j-TWEB, volume = "15", number = "1", pages = "3:1--3:31", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3408298", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 22 08:52:18 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3408298", abstract = "Node representation learning for signed directed networks has received considerable attention in many real-world applications such as link sign prediction, node classification, and node recommendation. The challenge lies in how to adequately encode the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Wang:2021:ACN, author = "Wei Wang and Jiaying Liu and Tao Tang and Suppawong Tuarob and Feng Xia and Zhiguo Gong and Irwin King", title = "Attributed Collaboration Network Embedding for Academic Relationship Mining", journal = j-TWEB, volume = "15", number = "1", pages = "4:1--4:20", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3409736", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 22 08:52:18 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3409736", abstract = "Finding both efficient and effective quantitative representations for scholars in scientific digital libraries has been a focal point of research. The unprecedented amounts of scholarly datasets, combined with contemporary machine learning and big data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Bailey:2021:SLA, author = "Shawn Bailey and Yue Zhang and Arti Ramesh and Jennifer Golbeck and Lise Getoor", title = "A Structured and Linguistic Approach to Understanding Recovery and Relapse in {AA}", journal = j-TWEB, volume = "15", number = "1", pages = "5:1--5:35", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423208", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 22 08:52:18 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3423208", abstract = "Alcoholism, also known as Alcohol Use Disorder (AUD), is a serious problem affecting millions of people worldwide. Recovery from AUD is known to be challenging and often leads to relapse at various points after enrolling in a rehabilitation program such \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Gong:2021:CSP, author = "Qingyuan Gong and Yang Chen and Xinlei He and Yu Xiao and Pan Hui and Xin Wang and Xiaoming Fu", title = "Cross-site Prediction on Social Influence for Cold-start Users in Online Social Networks", journal = j-TWEB, volume = "15", number = "2", pages = "6:1--6:23", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3409108", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 22 08:52:19 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3409108", abstract = "Online social networks (OSNs) have become a commodity in our daily life. As an important concept in sociology and viral marketing, the study of social influence has received a lot of attentions in academia. Most of the existing proposals work well on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Alhosban:2021:TPS, author = "Amal Alhosban and Zaki Malik and Khayyam Hashmi and Brahim Medjahed and Hassan Al-Ababneh", title = "A Two Phases Self-healing Framework for Service-oriented Systems", journal = j-TWEB, volume = "15", number = "2", pages = "7:1--7:25", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450443", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 22 08:52:19 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3450443", abstract = "Service-Oriented Architectures (SOA) enable the automatic creation of business applications from independently developed and deployed Web services. As Web services are inherently a priori unknown, how to deliver reliable Web services compositions is a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Flores:2021:UWT, author = "Marcel Flores and Andrew Kahn and Marc Warrior and Alan Mislove and Aleksandar Kuzmanovic", title = "Utilizing {Web} Trackers for {Sybil} Defense", journal = j-TWEB, volume = "15", number = "2", pages = "8:1--8:19", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450444", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 22 08:52:19 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3450444", abstract = "User tracking has become ubiquitous practice on the Web, allowing services to recommend behaviorally targeted content to users. In this article, we design Alibi, a system that utilizes such readily available personalized content, generated by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Roy:2021:IAI, author = "Soumyadeep Roy and Shamik Sural and Niyati Chhaya and Anandhavelu Natarajan and Niloy Ganguly", title = "An Integrated Approach for Improving Brand Consistency of {Web} Content: Modeling, Analysis, and Recommendation", journal = j-TWEB, volume = "15", number = "2", pages = "9:1--9:25", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450445", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 22 08:52:19 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3450445", abstract = "A consumer-dependent (business-to-consumer) organization tends to present itself as possessing a set of human qualities, which is termed the brand personality of the company. The perception is impressed upon the consumer through the content, be it in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Zhang:2021:EWD, author = "Jifeng Zhang and Wenjun Jiang and Jinrui Zhang and Jie Wu and Guojun Wang", title = "Exploring Weather Data to Predict Activity Attendance in Event-based Social Network: From the Organizer's View", journal = j-TWEB, volume = "15", number = "2", pages = "10:1--10:25", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3440134", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 22 08:52:19 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3440134", abstract = "Event-based social networks (EBSNs) connect online and offline lives. They allow online users with similar interests to get together in real life. Attendance prediction for activities in EBSNs has attracted a lot of attention and several factors have \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{White:2021:WMN, author = "Ryen W. White", title = "Welcome Message from the New {Editor-in-Chief}", journal = j-TWEB, volume = "15", number = "3", pages = "11e:1--11e:1", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3456294", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 15 07:11:18 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3456294", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11e", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Zhang:2021:STR, author = "Shuo Zhang and Krisztian Balog", title = "Semantic Table Retrieval Using Keyword and Table Queries", journal = j-TWEB, volume = "15", number = "3", pages = "11:1--11:33", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441690", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 15 07:11:18 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3441690", abstract = "Tables on the Web contain a vast amount of knowledge in a structured form. To tap into this valuable resource, we address the problem of table retrieval: answering an information need with a ranked list of tables. We investigate this problem in two \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Boschi:2021:WLW, author = "Gioia Boschi and Anthony P. Young and Sagar Joglekar and Chiara Cammarota and Nishanth Sastry", title = "Who Has the Last Word? {Understanding} How to Sample Online Discussions", journal = j-TWEB, volume = "15", number = "3", pages = "12:1--12:25", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3452936", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 15 07:11:18 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3452936", abstract = "In online debates, as in offline ones, individual utterances or arguments support or attack each other, leading to some subset of arguments (potentially from different sides of the debate) being considered more relevant than others. However, online \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Jiang:2021:RSG, author = "Wenjun Jiang and Jing Chen and Xiaofei Ding and Jie Wu and Jiawei He and Guojun Wang", title = "Review Summary Generation in Online Systems: Frameworks for Supervised and Unsupervised Scenarios", journal = j-TWEB, volume = "15", number = "3", pages = "13:1--13:33", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448015", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 15 07:11:18 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3448015", abstract = "In online systems, including e-commerce platforms, many users resort to the reviews or comments generated by previous consumers for decision making, while their time is limited to deal with many reviews. Therefore, a review summary, which contains all \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Chelmis:2021:DIC, author = "Charalampos Chelmis and Daphney-Stavroula Zois", title = "Dynamic, Incremental, and Continuous Detection of Cyberbullying in Online Social Media", journal = j-TWEB, volume = "15", number = "3", pages = "14:1--14:33", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448014", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 15 07:11:18 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3448014", abstract = "The potentially detrimental effects of cyberbullying have led to the development of numerous automated, data-driven approaches, with emphasis on classification accuracy. Cyberbullying, as a form of abusive online behavior, although not well-defined, is a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Mistry:2021:SLB, author = "Sajib Mistry and Sheik Mohammad Mostakim Fattah and Athman Bouguettaya", title = "Sequential Learning-based {IaaS} Composition", journal = j-TWEB, volume = "15", number = "3", pages = "15:1--15:37", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3452332", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 15 07:11:18 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3452332", abstract = "We propose a novel Infrastructure-as-a-Service composition framework that selects an optimal set of consumer requests according to the provider's qualitative preferences on long-term service provisions. Decision variables are included in the temporal \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Zhang:2021:SUC, author = "Peng Zhang and Baoxi Liu and Xianghua Ding and Tun Lu and Hansu Gu and Ning Gu", title = "Studying and Understanding Characteristics of Post-Syncing Practice and Goal in Social Network Sites", journal = j-TWEB, volume = "15", number = "4", pages = "16:1--16:26", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3457986", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 15 07:11:19 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3457986", abstract = "Many popular social network sites (SNSs) provide the post-syncing functionality, which allows users to synchronize posts automatically among different SNSs. Nowadays there exists divergence on this functionality from the view of sink SNS. The key to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Parikh:2021:CSM, author = "Pulkit Parikh and Harika Abburi and Niyati Chhaya and Manish Gupta and Vasudeva Varma", title = "Categorizing Sexism and Misogyny through Neural Approaches", journal = j-TWEB, volume = "15", number = "4", pages = "17:1--17:31", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3457189", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 15 07:11:19 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3457189", abstract = "Sexism, an injustice that subjects women and girls to enormous suffering, manifests in blatant as well as subtle ways. In the wake of growing documentation of experiences of sexism on the web, the automatic categorization of accounts of sexism has the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Jiao:2021:SGM, author = "Simiao Jiao and Zihui Xue and Xiaowei Chen and Yuedong Xu", title = "Sampling Graphlets of Multiplex Networks: a Restricted Random Walk Approach", journal = j-TWEB, volume = "15", number = "4", pages = "18:1--18:31", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3456291", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 15 07:11:19 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3456291", abstract = "Graphlets are induced subgraph patterns that are crucial to the understanding of the structure and function of a large network. A lot of effort has been devoted to calculating graphlet statistics where random walk-based approaches are commonly used to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Wang:2021:IEA, author = "Huan Wang and Chunming Qiao and Xuan Guo and Lei Fang and Ying Sha and Zhiguo Gong", title = "Identifying and Evaluating Anomalous Structural Change-based Nodes in Generalized Dynamic Social Networks", journal = j-TWEB, volume = "15", number = "4", pages = "19:1--19:22", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3457906", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Thu Jul 15 07:11:19 MDT 2021", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3457906", abstract = "Recently, dynamic social network research has attracted a great amount of attention, especially in the area of anomaly analysis that analyzes the anomalous change in the evolution of dynamic social networks. However, most of the current research focused \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Liu:2022:FHU, author = "Bang Liu and Hanlin Zhang and Linglong Kong and Di Niu", title = "Factorizing Historical User Actions for Next-Day Purchase Prediction", journal = j-TWEB, volume = "16", number = "1", pages = "1:1--1:26", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3468227", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 7 08:00:15 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3468227", abstract = "It is common practice for many large e-commerce operators to analyze daily logged transaction data to predict customer purchase behavior, which may potentially lead to more effective recommendations and increased sales. Traditional recommendation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Zhao:2022:CAD, author = "Yiji Zhao and Youfang Lin and Zhihao Wu and Yang Wang and Haomin Wen", title = "Context-aware Distance Measures for Dynamic Networks", journal = j-TWEB, volume = "16", number = "1", pages = "2:1--2:34", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3476228", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 7 08:00:15 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3476228", abstract = "Dynamic networks are widely used in the social, physical, and biological sciences as a concise mathematical representation of the evolving interactions in dynamic complex systems. Measuring distances between network snapshots is important for analyzing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Crichton:2022:HDH, author = "Kyle Crichton and Nicolas Christin and Lorrie Faith Cranor", title = "How Do Home Computer Users Browse the Web?", journal = j-TWEB, volume = "16", number = "1", pages = "3:1--3:27", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3473343", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 7 08:00:15 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3473343", abstract = "With the ubiquity of web tracking, information on how people navigate the internet is abundantly collected yet, due to its proprietary nature, rarely distributed. As a result, our understanding of user browsing primarily derives from small-scale studies \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Andriamilanto:2022:LSE, author = "Nampoina Andriamilanto and Tristan Allard and Ga{\"e}tan {Le Guelvouit} and Alexandre Garel", title = "A Large-scale Empirical Analysis of Browser Fingerprints Properties for Web Authentication", journal = j-TWEB, volume = "16", number = "1", pages = "4:1--4:62", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3478026", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 7 08:00:15 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3478026", abstract = "Modern browsers give access to several attributes that can be collected to form a browser fingerprint. Although browser fingerprints have primarily been studied as a web tracking tool, they can contribute to improve the current state of web security by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Poiitis:2022:ADM, author = "Marinos Poiitis and Athena Vakali and Nicolas Kourtellis", title = "On the Aggression Diffusion Modeling and Minimization in {Twitter}", journal = j-TWEB, volume = "16", number = "1", pages = "5:1--5:24", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3486218", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Jan 7 08:00:15 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3486218", abstract = "Aggression in online social networks has been studied mostly from the perspective of machine learning, which detects such behavior in a static context. However, the way aggression diffuses in the network has received little attention as it embeds modeling \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Nelson:2022:QCT, author = "Michael Nelson and Sridhar Radhakrishnan and Chandra Sekharan and Amlan Chatterjee and Sudhindra Gopal Krishna", title = "Queryable Compression on Time-evolving {Web} and Social Networks with Streaming", journal = j-TWEB, volume = "16", number = "2", pages = "6:1--6:21", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3495012", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 21 12:32:34 MDT 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3495012", abstract = "Time-evolving web and social network graphs are modeled as a set of pages/individuals (nodes) and their arcs (links/relationships) that change over time. Due to their popularity, they have become increasingly massive in terms of their number of nodes, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Wang:2022:LSE, author = "Kai Wang and Jun Pang and Dingjie Chen and Yu Zhao and Dapeng Huang and Chen Chen and Weili Han", title = "A Large-scale Empirical Analysis of Ransomware Activities in Bitcoin", journal = j-TWEB, volume = "16", number = "2", pages = "7:1--7:29", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494557", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 21 12:32:34 MDT 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/bitcoin.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3494557", abstract = "Exploiting the anonymous mechanism of Bitcoin, ransomware activities demanding ransom in bitcoins have become rampant in recent years. Several existing studies quantify the impact of ransomware activities, mostly focusing on the amount of ransom. However, victims' reactions in Bitcoin that can well reflect the impact of ransomware activities are somehow largely neglected. Besides, existing studies track ransom transfers at the Bitcoin address level, making it difficult for them to uncover the patterns of ransom transfers from a macro perspective beyond Bitcoin addresses.\par In this article, we conduct a large-scale analysis of ransom payments, ransom transfers, and victim migrations in Bitcoin from 2012 to 2021. First, we develop a fine-grained address clustering method to cluster Bitcoin addresses into users, which enables us to identify more addresses controlled by ransomware criminals. Second, motivated by the fact that Bitcoin activities and their participants already formed stable industries, such as Darknet and Miner, we train a multi-label classification model to identify the industry identifiers of users. Third, we identify ransom payment transactions and then quantify the amount of ransom and the number of victims in 63 ransomware activities. Finally, after we analyze the trajectories of ransom transferred across different industries and track victims' migrations across industries, we find out that to obscure the purposes of their transfer trajectories, most ransomware criminals (e.g., operators of Locky and Wannacry) prefer to spread ransom into multiple industries instead of utilizing the services of Bitcoin mixers. Compared with other industries, Investment is highly resilient to ransomware activities in the sense that the number of users in Investment remains relatively stable. Moreover, we also observe that a few victims become active in the Darknet after paying ransom. Our findings in this work can help authorities deeply understand ransomware activities in Bitcoin. While our study focuses on ransomware, our methods are potentially applicable to other cybercriminal activities that have similarly adopted bitcoins as their payments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Biswas:2022:TFR, author = "Arpita Biswas and Gourab K. Patro and Niloy Ganguly and Krishna P. Gummadi and Abhijnan Chakraborty", title = "Toward Fair Recommendation in Two-sided Platforms", journal = j-TWEB, volume = "16", number = "2", pages = "8:1--8:34", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3503624", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 21 12:32:34 MDT 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3503624", abstract = "Many online platforms today (such as Amazon, Netflix, Spotify, LinkedIn, and AirBnB) can be thought of as two-sided markets with producers and customers of goods and services. Traditionally, recommendation services in these platforms have focused on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Magno:2022:MIO, author = "Gabriel Magno and Virgilio Almeida", title = "Measuring International Online Human Values with Word Embeddings", journal = j-TWEB, volume = "16", number = "2", pages = "9:1--9:38", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3501306", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 21 12:32:34 MDT 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3501306", abstract = "As the Internet grows in number of users and in the diversity of services, it becomes more influential on peoples lives. It has the potential of constructing or modifying the opinion, the mental perception, and the values of individuals. What is being \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Karanatsiou:2022:MTB, author = "Dimitra Karanatsiou and Pavlos Sermpezis and Dritjon Gruda and Konstantinos Kafetsios and Ilias Dimitriadis and Athena Vakali", title = "My Tweets Bring All the Traits to the Yard: Predicting Personality and Relational Traits in Online Social Networks", journal = j-TWEB, volume = "16", number = "2", pages = "10:1--10:26", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3523749", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat May 21 12:32:34 MDT 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3523749", abstract = "Users in Online Social Networks (OSNs,) leave traces that reflect their personality characteristics. The study of these traces is important for several fields, such as social science, psychology, marketing, and others. Despite a marked increase in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Qian:2022:PVR, author = "Xin Qian and Ryan A. Rossi and Fan Du and Sungchul Kim and Eunyee Koh and Sana Malik and Tak Yeon Lee and Nesreen K. Ahmed", title = "Personalized Visualization Recommendation", journal = j-TWEB, volume = "16", number = "3", pages = "11:1--11:??", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3538703", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Nov 16 08:39:27 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3538703", abstract = "Visualization recommendation work has focused solely on scoring visualizations based on the underlying dataset, and not the actual user and their past visualization feedback. These systems recommend the same visualizations for every user, despite that the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Raponi:2022:FNP, author = "Simone Raponi and Zeinab Khalifa and Gabriele Oligeri and Roberto {Di Pietro}", title = "Fake News Propagation: a Review of Epidemic Models, Datasets, and Insights", journal = j-TWEB, volume = "16", number = "3", pages = "12:1--12:??", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522756", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Nov 16 08:39:27 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3522756", abstract = "Fake news propagation is a complex phenomenon influenced by a multitude of factors whose identification and impact assessment is challenging. Although many models have been proposed in the literature, the one capturing all the properties of a real fake-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Squicciarini:2022:EUG, author = "Anna Squicciarini and Sarah Rajtmajer and Yang Gao and Justin Semonsen and Andrew Belmonte and Pratik Agarwal", title = "An Extended Ultimatum Game for Multi-Party Access Control in Social Networks", journal = j-TWEB, volume = "16", number = "3", pages = "13:1--13:??", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3555351", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Nov 16 08:39:27 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3555351", abstract = "In this article, we aim to answer an important set of questions about the potential longitudinal effects of repeated sharing and privacy settings decisions over jointly managed content among users in a social network. We model user interactions through a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Oswald:2022:SIA, author = "C. Oswald and Sona Elza Simon and Arnab Bhattacharya", title = "{SpotSpam}: Intention Analysis-driven {SMS} Spam Detection Using {BERT} Embeddings", journal = j-TWEB, volume = "16", number = "3", pages = "14:1--14:??", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3538491", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Nov 16 08:39:27 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3538491", abstract = "Short Message Service (SMS) is one of the widely used mobile applications for global communication for personal and business purposes. Its widespread use for customer interaction, business updates, and reminders has made it a billion-dollar industry in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Jha:2022:IPP, author = "Nikhil Jha and Martino Trevisan and Luca Vassio and Marco Mellia", title = "The {Internet} with Privacy Policies: Measuring The {Web} Upon Consent", journal = j-TWEB, volume = "16", number = "3", pages = "15:1--15:??", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3555352", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Wed Nov 16 08:39:27 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3555352", abstract = "To protect user privacy, legislators have regulated the use of tracking technologies, mandating the acquisition of users' consent before collecting data. As a result, websites started showing more and more consent management modules-i.e., Consent Banners-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Frosini:2022:OTF, author = "Riccardo Frosini and Alexandra Poulovassilis and Peter T. Wood and Andrea Cal{\'\i}", title = "Optimisation Techniques for Flexible {SPARQL} Queries", journal = j-TWEB, volume = "16", number = "4", pages = "16:1--16:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3532855", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Dec 9 06:51:15 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3532855", abstract = "Resource Description Framework datasets can be queried using the SPARQL language but are often irregularly structured and incomplete, which may make precise query formulation hard for users. The SPARQL$^{AR}$ language extends SPARQL 1.1 with two operators-\ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Chaqfeh:2022:JWD, author = "Moumena Chaqfeh and Russell Coke and Jacinta Hu and Waleed Hashmi and Lakshmi Subramanian and Talal Rahwan and Yasir Zaki", title = "\pkg{JSAnalyzer}: a {Web} Developer Tool for Simplifying Mobile {Web} Pages through Non-critical {JavaScript} Elimination", journal = j-TWEB, volume = "16", number = "4", pages = "17:1--17:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3550358", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Dec 9 06:51:15 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/java2020.bib; http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3550358", abstract = "The amount of JavaScript used in web pages has substantially grown in the past decade, leading to large and complex pages that are computationally intensive for handheld mobile devices. Due to the increasing usage of these devices to access today's web, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Upadhyaya:2022:SFV, author = "Apoorva Upadhyaya and Joydeep Chandra", title = "Spotting Flares: The Vital Signs of the Viral Spread of Tweets Made During Communal Incidents", journal = j-TWEB, volume = "16", number = "4", pages = "18:1--18:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3550357", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Dec 9 06:51:15 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3550357", abstract = "With the increasing use of Twitter for encouraging users to instigate violent behavior with hate and racial content, it becomes necessary to investigate the uniqueness in the dynamics of the spread of tweets made during violent communal incidents and the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Chen:2022:DAC, author = "Qi Chen and Guohui Li and Quan Zhou and Si Shi and Deqing Zou", title = "Double Attention Convolutional Neural Network for Sequential Recommendation", journal = j-TWEB, volume = "16", number = "4", pages = "19:1--19:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3555350", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Dec 9 06:51:15 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3555350", abstract = "The explosive growth of e-commerce and online service has led to the development of recommender system. Aiming to provide a list of items to meet a user's personalized need by analyzing his/her interaction $^1$ history, recommender system has been widely \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Wang:2022:BBS, author = "Xi Wang and Iadh Ounis and Craig Macdonald", title = "\pkg{BanditProp}: Bandit Selection of Review Properties for Effective Recommendation", journal = j-TWEB, volume = "16", number = "4", pages = "20:1--20:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3532859", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Dec 9 06:51:15 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3532859", abstract = "Many recent recommendation systems leverage the large quantity of reviews placed by users on items. However, it is both challenging and important to accurately measure the usefulness of such reviews for effective recommendation. In particular, users have \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Moon:2022:MME, author = "Taegeun Moon and Hyoungshick Kim and Sangwon Hyun", title = "\pkg{Mutexion}: Mutually Exclusive Compression System for Mitigating Compression Side-Channel Attacks", journal = j-TWEB, volume = "16", number = "4", pages = "21:1--21:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3532850", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Fri Dec 9 06:51:15 MST 2022", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3532850", abstract = "To enhance the performance of web services, web servers often compress data to be delivered. Unfortunately, the data compression technique has also introduced a side effect called compression side-channel attacks (CSCA). CSCA allows eavesdroppers to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Haider:2023:CLV, author = "Waqar Haider and Yeliz Yesilada", title = "Classification of Layout vs. Relational Tables on the {Web}: Machine Learning with Rendered Pages", journal = j-TWEB, volume = "17", number = "1", pages = "1:1--1:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3555349", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:44 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3555349", abstract = "Table mining on the web is an open problem, and none of the previously proposed techniques provides a complete solution. Most research focuses on the structure of the HTML document, but because of the nature and structure of the web, it is still a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "1", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Xiao:2023:DKE, author = "Yunming Xiao and Matteo Varvello and Marc Warrior and Aleksandar Kuzmanovic", title = "Decoding the {Kodi} Ecosystem", journal = j-TWEB, volume = "17", number = "1", pages = "2:1--2:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3563700", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:44 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3563700", abstract = "Free and open-source media centers are experiencing a boom in popularity for the convenience they offer users seeking to remotely consume digital content. Kodi is today's most popular home media center, with millions of users worldwide. Kodi's popularity derives from its ability to centralize the sheer amount of media content available on the Web, both free and copyrighted. Researchers have been hinting at potential security concerns around Kodi, due to add-ons injecting unwanted content as well as user settings linked with security holes. Motivated by these observations, this article conducts the first comprehensive analysis of the Kodi ecosystem: 15,000 Kodi users from 104 countries, 11,000 unique add-ons, and data collected over 9 months. Our work makes three important contributions. Our first contribution is that we build ``crawling'' software (de-Kodi) which can automatically install a Kodi add-on, explore its menu, and locate (video) content. This is challenging for two main reasons. First, Kodi largely relies on visual information and user input which intrinsically complicates automation. Second, the potential sheer size of this ecosystem (i.e., the number of available add-ons) requires a highly scalable crawling solution. Our second contribution is that we develop a solution to discover Kodi add-ons. Our solution combines Web crawling of popular websites where Kodi add-ons are published (LazyKodi and GitHub) and SafeKodi, a Kodi add-on we have developed which leverages the help of Kodi users to learn which add-ons are used in the wild and, in return, offers information about how safe these add-ons are, e.g., do they track user activity or contact sketchy URLs/IP addresses. Our third contribution is a classifier to passively detect Kodi traffic and add-on usage in the wild. Our analysis of the Kodi ecosystem reveals the following findings. We find that most installed add-ons are unofficial but safe to use. Still, 78\% of the users have installed at least one unsafe add-on, and even worse, such add-ons are among the most popular. In response to the information offered by SafeKodi, one-third of the users reacted by disabling some of their add-ons. However, the majority of users ignored our warnings for several months attracted by the content such unsafe add-ons have to offer. Last but not least, we show that Kodi's auto-update, a feature active for 97.6\% of SafeKodi users, makes Kodi users easily identifiable by their ISPs. While passively identifying which Kodi add-on is in use is, as expected, much harder, we also find that many unofficial add-ons do not use HTTPS yet, making their passive detection straightforward.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "2", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Wang:2023:CPS, author = "Xiao Wang and Craig MacDonald and Nicola Tonellotto and Iadh Ounis", title = "{ColBERT-PRF}: Semantic Pseudo-Relevance Feedback for Dense Passage and Document Retrieval", journal = j-TWEB, volume = "17", number = "1", pages = "3:1--3:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572405", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:44 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3572405", abstract = "Pseudo-relevance feedback mechanisms, from Rocchio to the relevance models, have shown the usefulness of expanding and reweighting the users' initial queries using information occurring in an initial set of retrieved documents, known as the pseudo-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "3", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Zou:2023:PTL, author = "Lixin Zou and Weixue Lu and Yiding Liu and Hengyi Cai and Xiaokai Chu and Dehong Ma and Daiting Shi and Yu Sun and Zhicong Cheng and Simiu Gu and Shuaiqiang Wang and Dawei Yin", title = "Pre-trained Language Model-based Retrieval and Ranking for {Web} Search", journal = j-TWEB, volume = "17", number = "1", pages = "4:1--4:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568681", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:44 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3568681", abstract = "Pre-trained language representation models (PLMs) such as BERT and Enhanced Representation through kNowledge IntEgration (ERNIE) have been integral to achieving recent improvements on various downstream tasks, including information retrieval. However, it \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "4", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Chen:2023:KED, author = "Lei Chen and Jie Cao and Weichao Liang and Jia Wu and Qiaolin Ye", title = "Keywords-enhanced Deep Reinforcement Learning Model for Travel Recommendation", journal = j-TWEB, volume = "17", number = "1", pages = "5:1--5:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3570959", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:44 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3570959", abstract = "Tourism is an important industry and a popular entertainment activity involving billions of visitors per annum. One challenging problem tourists face is identifying satisfactory products from vast tourism information. Most of travel recommendation methods \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "5", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Wang:2023:ECR, author = "Yingxu Wang and Xiaoru Chen and Jinyuan Fang and Zaiqiao Meng and Shangsong Liang", title = "Enhancing Conversational Recommendation Systems with Representation Fusion", journal = j-TWEB, volume = "17", number = "1", pages = "6:1--6:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3577034", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:44 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3577034", abstract = "Conversational Recommendation Systems (CRSs) aim to improve recommendation performance by utilizing information from a conversation session. A CRS first constructs questions and then asks users for their feedback in each conversation session to refine \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "6", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Chen:2023:FWO, author = "Chung-Chi Chen and Hen-Hsen Huang and Hiroya Takamura and Makoto P. Kato and Yu-Lieh Huang", title = "{FinTech} on the {Web}: an Overview", journal = j-TWEB, volume = "17", number = "2", pages = "7:1--7:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572404", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:45 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3572404", abstract = "In this article, we provide an overview of ACM TWEB's special issue, Financial Technology on the Web. This special issue covers diverse topics: (1) a new architecture for leveraging online news to investment and risk management, (2) a cross-platform \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "7", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Ang:2023:IRM, author = "Gary Ang and Ee-Peng Lim", title = "Investment and Risk Management with Online News and Heterogeneous Networks", journal = j-TWEB, volume = "17", number = "2", pages = "8:1--8:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532858", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:45 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3532858", abstract = "Stock price movements in financial markets are influenced by large volumes of news from diverse sources on the web, e.g., online news outlets, blogs, social media. Extracting useful information from online news for financial tasks, e.g., forecasting stock \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "8", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Bouadjenek:2023:UCA, author = "Mohamed Reda Bouadjenek and Scott Sanner and Ga Wu", title = "A User-Centric Analysis of Social Media for Stock Market Prediction", journal = j-TWEB, volume = "17", number = "2", pages = "9:1--9:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532856", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:45 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3532856", abstract = "Social media platforms such as Twitter or StockTwits are widely used for sharing stock market opinions between investors, traders, and entrepreneurs. Empirically, previous work has shown that the content posted on these social media platforms can be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "9", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Kitzler:2023:DDF, author = "Stefan Kitzler and Friedhelm Victor and Pietro Saggese and Bernhard Haslhofer", title = "Disentangling Decentralized Finance {(DeFi)} Compositions", journal = j-TWEB, volume = "17", number = "2", pages = "10:1--10:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532857", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:45 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3532857", abstract = "We present a measurement study on compositions of Decentralized Finance (DeFi) protocols, which aim to disrupt traditional finance and offer services on top of distributed ledgers, such as Ethereum. Understanding DeFi compositions is of great importance, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "10", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Piccardi:2023:LSC, author = "Tiziano Piccardi and Martin Gerlach and Akhil Arora and Robert West", title = "A Large-Scale Characterization of How Readers Browse {Wikipedia}", journal = j-TWEB, volume = "17", number = "2", pages = "11:1--11:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580318", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:45 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3580318", abstract = "Despite the importance and pervasiveness of Wikipedia as one of the largest platforms for open knowledge, surprisingly little is known about how people navigate its content when seeking information. To bridge this gap, we present the first systematic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "11", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Sun:2023:ICW, author = "Chang-Ai Sun and An Fu and Jingting Jia and Meng Li and Jun Han", title = "Improving Conformance of {Web} Services: a Constraint-based Model-driven Approach", journal = j-TWEB, volume = "17", number = "2", pages = "12:1--12:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580515", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:45 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3580515", abstract = "Web services have been widely used to develop complex distributed software systems in the context of Service Oriented Architecture (SOA). As a standard for describing Web services, the Web Service Description Language (WSDL) provides a universal mechanism \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "12", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Jain:2023:OLI, author = "Lokesh Jain and Rahul Katarya and Shelly Sachdeva", title = "Opinion Leaders for Information Diffusion Using Graph Neural Network in Online Social Networks", journal = j-TWEB, volume = "17", number = "2", pages = "13:1--13:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580516", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Mon Apr 17 18:10:45 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3580516", abstract = "Various opportunities are available to depict different domains due to the diverse nature of social networks and researchers' insatiable. An opinion leader is a human entity or cluster of people who can redirect human assessment strategy by intellectual \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "13", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Peng:2023:ISI, author = "Hao Peng and Jian Yang and Jia Wu and Philip S. Yu", title = "Introduction to the Special Issue on Advanced Graph Mining on the {Web}: Theory, Algorithms, and Applications: {Part 1}", journal = j-TWEB, volume = "17", number = "3", pages = "14:1--14:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3579360", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Aug 19 07:32:23 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3579360", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "14", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Yang:2023:RAS, author = "Yingguang Yang and Renyu Yang and Yangyang Li and Kai Cui and Zhiqin Yang and Yue Wang and Jie Xu and Haiyong Xie", title = "{RoSGAS}: Adaptive Social Bot Detection with Reinforced Self-supervised {GNN} Architecture Search", journal = j-TWEB, volume = "17", number = "3", pages = "15:1--15:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572403", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Aug 19 07:32:23 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3572403", abstract = "Social bots are referred to as the automated accounts on social networks that make attempts to behave like humans. While Graph Neural Networks (GNNs) have been massively applied to the field of social bot detection, a huge amount of domain expertise and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "15", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Du:2023:NRT, author = "Haohua Du and Yue Wang and Xiaoya Xu and Mingsheng Liu", title = "{Niffler}: Real-time Device-level Anomalies Detection in Smart Home", journal = j-TWEB, volume = "17", number = "3", pages = "16:1--16:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3586073", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Aug 19 07:32:23 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3586073", abstract = "Device-level security has become a major concern in smart home systems. Detecting problems in smart home systems strives to increase accuracy in near real time without hampering the regular tasks of the smart home. The current state of the art in detecting \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "16", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Sun:2023:GDR, author = "Li Sun and Yang Du and Shuai Gao and Junda Ye and Feiyang Wang and Fuxin Ren and Mingchen Liang and Yue Wang and Shuhai Wang", title = "{GroupAligner}: a Deep Reinforcement Learning with Domain Adaptation for Social Group Alignment", journal = j-TWEB, volume = "17", number = "3", pages = "17:1--17:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580509", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Aug 19 07:32:23 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3580509", abstract = "Social network alignment, which aims to uncover the correspondence across different social networks, shows fundamental importance in a wide spectrum of applications such as cross-domain recommendation and information propagation. In the literature, the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "17", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Zhu:2023:MTG, author = "Guixiang Zhu and Jie Cao and Lei Chen and Youquan Wang and Zhan Bu and Shuxin Yang and Jianqing Wu and Zhiping Wang", title = "A Multi-Task Graph Neural Network with Variational Graph Auto-Encoders for Session-Based Travel Packages Recommendation", journal = j-TWEB, volume = "17", number = "3", pages = "18:1--18:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3577032", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Aug 19 07:32:23 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3577032", abstract = "Session-based travel packages recommendation aims to predict users' next click based on their current and historical sessions recorded by Online Travel Agencies (OTAs). Recently, an increasing number of studies attempted to apply Graph Neural Networks (. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "18", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Ahmed:2023:GAN, author = "Usman Ahmed and Jerry Chun-Wei Lin and Gautam Srivastava", title = "Graph Attention Network for Text Classification and Detection of Mental Disorder", journal = j-TWEB, volume = "17", number = "3", pages = "19:1--19:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572406", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Aug 19 07:32:23 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3572406", abstract = "A serious issue in today's society is Depression, which can have a devastating impact on a person's ability to cope in daily life. Numerous studies have examined the use of data generated directly from users using social media to diagnose and detect \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "19", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Li:2023:TIU, author = "Qian Li and Jianxin Li and Lihong Wang and Cheng Ji and Yiming Hei and Jiawei Sheng and Qingyun Sun and Shan Xue and Pengtao Xie", title = "Type Information Utilized Event Detection via Multi-Channel {GNNs} in Electrical Power Systems", journal = j-TWEB, volume = "17", number = "3", pages = "20:1--20:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3577031", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Aug 19 07:32:23 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3577031", abstract = "Event detection in power systems aims to identify triggers and event types, which helps relevant personnel respond to emergencies promptly and facilitates the optimization of power supply strategies. However, the limited length of short electrical record \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "20", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Wang:2023:HGT, author = "Shuhai Wang and Xin Liu and Xiao Pan and Hanjie Xu and Mingrui Liu", title = "Heterogeneous Graph Transformer for Meta-structure Learning with Application in Text Classification", journal = j-TWEB, volume = "17", number = "3", pages = "21:1--21:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580508", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Aug 19 07:32:23 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3580508", abstract = "The prevalent heterogeneous Graph Neural Network (GNN) models learn node and graph representations using pre-defined meta-paths or only automatically discovering meta-paths. However, the existing methods suffer from information loss due to neglecting \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "21", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Gong:2023:RMC, author = "Jibing Gong and Yao Wan and Ye Liu and Xuewen Li and Yi Zhao and Cheng Wang and Yuting Lin and Xiaohan Fang and Wenzheng Feng and Jingyi Zhang and Jie Tang", title = "Reinforced {MOOCs} Concept Recommendation in Heterogeneous Information Networks", journal = j-TWEB, volume = "17", number = "3", pages = "22:1--22:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580510", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Aug 19 07:32:23 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3580510", abstract = "Massive open online courses (MOOCs), which offer open access and widespread interactive participation through the internet, are quickly becoming the preferred method for online and remote learning. Several MOOC platforms offer the service of course \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "22", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Shang:2023:CST, author = "Zhihua Shang and Hongtao Xie and Lingyun Yu and Zhengjun Zha and Yongdong Zhang", title = "Constructing Spatio-Temporal Graphs for Face Forgery Detection", journal = j-TWEB, volume = "17", number = "3", pages = "23:1--23:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580512", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Aug 19 07:32:23 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3580512", abstract = "Recently, advanced development of facial manipulation techniques threatens web information security, thus, face forgery detection attracts a lot of attention. It is clear that both spatial and temporal information of facial videos contains the crucial \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "23", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", } @Article{Sheshbolouki:2023:SES, author = "Aida Sheshbolouki and M. Tamer {\"O}zsu", title = "{sGrow}: Explaining the Scale-Invariant Strength Assortativity of Streaming Butterflies", journal = j-TWEB, volume = "17", number = "3", pages = "24:1--24:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572408", ISSN = "1559-1131 (print), 1559-114X (electronic)", ISSN-L = "1559-1131", bibdate = "Sat Aug 19 07:32:23 MDT 2023", bibsource = "http://www.math.utah.edu/pub/tex/bib/tweb.bib", URL = "https://dl.acm.org/doi/10.1145/3572408", abstract = "Bipartite graphs are rich data structures with prevalent applications and characteristic structural features. However, less is known about their growth patterns, particularly in streaming settings. Current works study the patterns of static or aggregated \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Web", articleno = "24", fjournal = "ACM Transactions on the Web (TWEB)", journal-URL = "https://dl.acm.org/loi/tweb", }