%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "1.00", %%% date = "16 June 2017", %%% time = "09:35:37 MST", %%% filename = "pomacs.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% FAX = "+1 801 581 4148", %%% URL = "http://www.math.utah.edu/~beebe", %%% checksum = "22676 1549 8942 82901", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "bibliography; BibTeX; Proceedings of the ACM %%% on Measurement and Analysis of Computing %%% Systems (POMACS)", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a COMPLETE BibTeX bibliography for %%% Proceedings of the ACM on Measurement and %%% Analysis of Computing Systems (POMACS) (CODEN %%% ????, ISSN 2476-1249). The journal appears %%% annually, and publication began with volume %%% 1, number 1, in June 2017. %%% %%% At version 1.00, the COMPLETE journal %%% coverage looked like this: %%% %%% 2017 ( 27) %%% %%% Article: 27 %%% %%% Total entries: 27 %%% %%% The journal Web pages can be found at: %%% %%% http://pomacs.acm.org/ %%% http://pomacs.acm.org/archive-toc.cfm %%% %%% The journal table of contents page is at: %%% %%% http://dl.acm.org/pub.cfm?id=J1567 %%% %%% Qualified subscribers can retrieve the full %%% text of recent articles in PDF form. %%% %%% The initial draft was extracted from the ACM %%% Web pages. %%% %%% ACM copyrights explicitly permit abstracting %%% with credit, so article abstracts, keywords, %%% and subject classifications have been %%% included in this bibliography wherever %%% available. Article reviews have been %%% omitted, until their copyright status has %%% been clarified. %%% %%% bibsource keys in the bibliography entries %%% below indicate the entry originally came %%% from the computer science bibliography %%% archive, even though it has likely since %%% been corrected and updated. %%% %%% URL keys in the bibliography point to %%% World Wide Web locations of additional %%% information about the entry. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed for the %%% BibNet Project. %%% %%% In this bibliography, entries are sorted in %%% publication order, using ``bibsort -byvolume.'' %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility.", %%% } %%% ====================================================================

@Preamble{"\input bibnames.sty" # "\def \TM {${}^{\sc TM}$}" }

%%% ==================================================================== %%% Acknowledgement abbreviations:

@String{ack-nhfb= "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, FAX: +1 801 581 4148, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ==================================================================== %%% Journal abbreviations:

@String{j-POMACS= "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)"}

%%% ==================================================================== %%% Bibliography entries:

@Article{Chaintreau:2017:E, author = "Augustin Chaintreau and Leana Golubchik and Zhi-Li Zhang", title = "Editorial", journal = j-POMACS, volume = "1", number = "1", pages = "1:1--1:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3105875", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3105875", acknowledgement = ack-nhfb, articleno = "1", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Gibbens:2017:HND, author = "Mathias Gibbens and Chris Gniady and Lei Ye and Beichuan Zhang", title = "{Hadoop} on Named Data Networking: Experience and Results", journal = j-POMACS, volume = "1", number = "1", pages = "2:1--2:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084439", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084439", abstract = "The Named Data Networking (NDN) architecture retrieves content by names rather than connecting to specific hosts. It provides benefits such as highly efficient and resilient content distribution, which fit well to data-intensive distributed computing. This paper presents and discusses our experience in modifying Apache Hadoop, a popular MapReduce framework, to operate on an NDN network. Through this first-of-its-kind implementation process, we demonstrate the feasibility of running an existing, large, and complex piece of distributed software commonly seen in data centers over NDN. We show advantages such as simplified network code and reduced network traffic which are beneficial in a data center environment. There are also challenges faced by NDN, that are being addressed by the community, which can be magnified under data center traffic. Through detailed evaluation, we show a reduction of 16\% for overall data transmission between Hadoop nodes while writing data with default replication settings. Preliminary results also show promise for in-network caching of repeated reads in distributed applications. We also show that overall performance is currently slower under NDN, and we identify challenges and opportunities for further NDN improvements.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Gong:2017:QPS, author = "Long Gong and Paul Tune and Liang Liu and Sen Yang and Jun (Jim) Xu", title = "Queue-Proportional Sampling: A Better Approach to Crossbar Scheduling for Input-Queued Switches", journal = j-POMACS, volume = "1", number = "1", pages = "3:1--3:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084440", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084440", abstract = "Most present day switching systems, in Internet routers and data-center switches, employ a single input-queued crossbar to interconnect input ports with output ports. Such switches need to compute a matching, between input and output ports, for each switching cycle (time slot). The main challenge in designing such matching algorithms is to deal with the unfortunate tradeoff between the quality of the computed matching and the computational complexity of the algorithm. In this paper, we propose a general approach that can significantly boost the performance of both SERENA and iSLIP, yet incurs only O(1) additional computational complexity at each input/output port. Our approach is a novel proposing strategy, called Queue-Proportional Sampling (QPS), that generates an excellent starter matching. We show, through rigorous simulations, that when starting with this starter matching, iSLIP and SERENA can output much better final matching decisions, as measured by the resulting throughput and delay performance, than they otherwise can.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Quach:2017:ILT, author = "Alan Quach and Zhongjie Wang and Zhiyun Qian", title = "Investigation of the 2016 {Linux TCP} Stack Vulnerability at Scale", journal = j-POMACS, volume = "1", number = "1", pages = "4:1--4:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084441", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084441", abstract = "To combat blind in-window attacks against TCP, changes proposed in RFC 5961 have been implemented by Linux since late 2012. While successfully eliminating the old vulnerabilities, the new TCP implementation was reported in August 2016 to have introduced a subtle yet serious security flaw. Assigned CVE-2016-5696, the flaw exploits the challenge ACK rate limiting feature that could allow an off-path attacker to infer the presence/absence of a TCP connection between two arbitrary hosts, terminate such a connection, and even inject payload into an unsecured TCP connection. In this work, we perform a comprehensive measurement of the impact of the new vulnerability. This includes (1) tracking the vulnerable Internet servers, (2) monitoring the patch behavior over time, (3) picturing the overall security status of TCP stacks at scale. Towards this goal, we design a scalable measurement methodology to scan the Alexa top 1 million websites for almost 6 months. We also present how notifications impact the patching behavior, and compare the result with the Heartbleed and the Debian PRNG vulnerability. The measurement represents a valuable data point in understanding how Internet servers react to serious security flaws in the operating system kernel.", acknowledgement = ack-nhfb, articleno = "4", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Sharma:2017:PDR, author = "Prateek Sharma and David Irwin and Prashant Shenoy", title = "Portfolio-driven Resource Management for Transient Cloud Servers", journal = j-POMACS, volume = "1", number = "1", pages = "5:1--5:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084442", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib; http://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dl.acm.org/citation.cfm?id=3084442", abstract = "Cloud providers have begun to offer their surplus capacity in the form of low-cost transient servers, which can be revoked unilaterally at any time. While the low cost of transient servers makes them attractive for a wide range of applications, such as data processing and scientific computing, failures due to server revocation can severely degrade application performance. Since different transient server types offer different cost and availability tradeoffs, we present the notion of server portfolios that is based on financial portfolio modeling. Server portfolios enable construction of an 'optimal' mix of severs to meet an application's sensitivity to cost and revocation risk. We implement model-driven portfolios in a system called ExoSphere, and show how diverse applications can use portfolios and application-specific policies to gracefully handle transient servers. We show that ExoSphere enables widely-used parallel applications such as Spark, MPI, and BOINC to be made transiency-aware with modest effort. Our experiments show that allowing the applications to use suitable transiency-aware policies, ExoSphere is able to achieve 80\% cost savings when compared to on-demand servers and greatly reduces revocation risk compared to existing approaches.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Cao:2017:DEC, author = "Yi Cao and Javad Nejati and Muhammad Wajahat and Aruna Balasubramanian and Anshul Gandhi", title = "Deconstructing the Energy Consumption of the Mobile Page Load", journal = j-POMACS, volume = "1", number = "1", pages = "6:1--6:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084443", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084443", abstract = "Modeling the energy consumption of applications on mobile devices is an important topic that has received much attention in recent years. However, there has been very little research on modeling the energy consumption of the mobile Web. This is primarily due to the short-lived yet complex page load process that makes it infeasible to rely on coarse-grained resource monitoring for accurate power estimation. We present RECON, a modeling approach that accurately estimates the energy consumption of any Web page load and deconstructs it into the energy contributions of individual page load activities. Our key intuition is to leverage low-level application semantics in addition to coarse-grained resource utilizations for modeling the page load energy consumption. By exploiting fine-grained information about the individual activities that make up the page load, RECON enables fast and accurate energy estimations without requiring complex models. Experiments across 80 Web pages and under four different optimizations show that RECON can estimate the energy consumption for a Web page load with an average error of less than 7\%. Importantly, RECON helps to analyze and explain the energy effects of an optimization on the individual components of Web page loads.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Wang:2017:OTS, author = "Xin Wang and Richard T. B. Ma and Yinlong Xu", title = "On Optimal Two-Sided Pricing of Congested Networks", journal = j-POMACS, volume = "1", number = "1", pages = "7:1--7:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084444", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084444", abstract = "Traditionally, Internet Access Providers (APs) only charge end-users for Internet access services; however, to recoup infrastructure costs and increase revenues, some APs have recently adopted two-sided pricing schemes under which both end-users and content providers are charged. Meanwhile, with the rapid growth of traffic, network congestion could seriously degrade user experiences and influence providers' utility. To optimize profit and social welfare, APs and regulators need to design appropriate pricing strategies and regulatory policies that take the effects of network congestion into consideration. In this paper, we model two-sided networks under which users' traffic demands are influenced by exogenous pricing and endogenous congestion parameters and derive the system congestion under an equilibrium. We characterize the structures and sensitivities of profit- and welfare-optimal two-sided pricing schemes and reveal that (1) the elasticity of system throughput plays a crucial role in determining the structures of optimal pricing, (2) the changes of optimal pricing under varying AP's capacity and users' congestion sensitivity are largely driven by the type of data traffic, e.g., text or video, and (3) APs and regulators will be incentivized to shift from one-sided to two-sided pricing when APs' capacities and user demand for video traffic grow. Our results can help APs design optimal two-sided pricing and guide regulators to legislate desirable policies.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Casale:2017:API, author = "Giuliano Casale", title = "Accelerating Performance Inference over Closed Systems by Asymptotic Methods", journal = j-POMACS, volume = "1", number = "1", pages = "8:1--8:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084445", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084445", abstract = "Recent years have seen a rapid growth of interest in exploiting monitoring data collected from enterprise applications for automated management and performance analysis. In spite of this trend, even simple performance inference problems involving queueing theoretic formulas often incur computational bottlenecks, for example upon computing likelihoods in models of batch systems. Motivated by this issue, we revisit the solution of multiclass closed queueing networks, which are popular models used to describe batch and distributed applications with parallelism constraints. We first prove that the normalizing constant of the equilibrium state probabilities of a closed model can be reformulated exactly as a multidimensional integral over the unit simplex. This gives as a by-product novel explicit expressions for the multiclass normalizing constant. We then derive a method based on cubature rules to efficiently evaluate the proposed integral form in small and medium-sized models. For large models, we propose novel asymptotic expansions and Monte Carlo sampling methods to efficiently and accurately approximate normalizing constants and likelihoods. We illustrate the resulting accuracy gains in problems involving optimization-based inference.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Ju:2017:HLS, author = "Xiaoen Ju and Hani Jamjoom and Kang G. Shin", title = "{Hieroglyph}: Locally-Sufficient Graph Processing via Compute--Sync--Merge", journal = j-POMACS, volume = "1", number = "1", pages = "9:1--9:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084446", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084446", abstract = "Despite their widespread adoption, large-scale graph processing systems do not fully decouple computation and communication, often yielding suboptimal performance. Locally-sufficient computation-computation that relies only on the graph state local to a computing host-can mitigate the effects of this coupling. In this paper, we present Compute--Sync--Merge (CSM), a new programming abstraction that achieves efficient locally-sufficient computation. CSM enforces local sufficiency at the programming abstraction level and enables the activation of vertex-centric computation on all vertex replicas, thus supporting vertex-cut partitioning. We demonstrate the simplicity of expressing several fundamental graph algorithms in CSM. Hieroglyph-our implementation of a graph processing system with CSM support-outperforms state of the art by up to 53x, with a median speedup of 3.5x and an average speedup of 6x across a wide range of datasets.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Chang:2017:URV, author = "Kevin K. Chang and A. Giray Ya{\u{a}}lik{\c{c}}i and Saugata Ghose and Aditya Agrawal and Niladrish Chatterjee and Abhijith Kashyap and Donghyuk Lee and Mike O'Connor and Hasan Hassan and Onur Mutlu", title = "Understanding Reduced-Voltage Operation in Modern {DRAM} Devices: Experimental Characterization, Analysis, and Mechanisms", journal = j-POMACS, volume = "1", number = "1", pages = "10:1--10:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084447", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084447", abstract = "The energy consumption of DRAM is a critical concern in modern computing systems. Improvements in manufacturing process technology have allowed DRAM vendors to lower the DRAM supply voltage conservatively, which reduces some of the DRAM energy consumption. We would like to reduce the DRAM supply voltage more aggressively, to further reduce energy. Aggressive supply voltage reduction requires a thorough understanding of the effect voltage scaling has on DRAM access latency and DRAM reliability. In this paper, we take a comprehensive approach to understanding and exploiting the latency and reliability characteristics of modern DRAM when the supply voltage is lowered below the nominal voltage level specified by DRAM standards. Using an FPGA-based testing platform, we perform an experimental study of 124 real DDR3L (low-voltage) DRAM chips manufactured recently by three major DRAM vendors. We find that reducing the supply voltage below a certain point introduces bit errors in the data, and we comprehensively characterize the behavior of these errors. We discover that these errors can be avoided by increasing the latency of three major DRAM operations (activation, restoration, and precharge). We perform detailed DRAM circuit simulations to validate and explain our experimental findings. We also characterize the various relationships between reduced supply voltage and error locations, stored data patterns, DRAM temperature, and data retention. Based on our observations, we propose a new DRAM energy reduction mechanism, called Voltron. The key idea of Voltron is to use a performance model to determine by how much we can reduce the supply voltage without introducing errors and without exceeding a user-specified threshold for performance loss. Our evaluations show that Voltron reduces the average DRAM and system energy consumption by 10.5\% and 7.3\%, respectively, while limiting the average system performance loss to only 1.8\%, for a variety of memory-intensive quad-core workloads. We also show that Voltron significantly outperforms prior dynamic voltage and frequency scaling mechanisms for DRAM.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Wang:2017:UBI, author = "Cheng Wang and Bhuvan Urgaonkar and Neda Nasiriani and George Kesidis", title = "Using Burstable Instances in the Public Cloud: Why, When and How?", journal = j-POMACS, volume = "1", number = "1", pages = "11:1--11:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084448", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib; http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "http://dl.acm.org/citation.cfm?id=3084448", abstract = "Amazon EC2 and Google Compute Engine (GCE) have recently introduced a new class of virtual machines called 'burstable' instances that are cheaper than even the smallest traditional/regular instances. These lower prices come with reduced average capacity and increased variance. Using measurements from both EC2 and GCE, we identify key idiosyncrasies of resource capacity dynamism for burstable instances that set them apart from other instance types. Most importantly, certain resources for these instances appear to be regulated by deterministic token bucket like mechanisms. We find widely different types of disclosures by providers of the parameters governing these regulation mechanisms: full disclosure (e.g., CPU capacity for EC2 t2 instances), partial disclosure (e.g., CPU capacity and remote disk IO bandwidth for GCE shared-core instances), or no disclosure (network bandwidth for EC2 t2 instances). A tenant modeling these variations as random phenomena (as some recent work suggests) might make sub-optimal procurement and operation decisions. We present modeling techniques for a tenant to infer the properties of these regulation mechanisms via simple offline measurements. We also present two case studies of how certain memcached workloads might benefit from our modeling when operating on EC2 by: (i) augmenting cheap but low availability in-memory storage offered by spot instances with backup of popular content on burstable instances, and (ii) temporal multiplexing of multiple burstable instances to achieve the CPU or network bandwidth (and thereby throughput) equivalent of a more expensive regular EC2 instance.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Ying:2017:SMM, author = "Lei Ying", title = "{Stein}'s Method for Mean Field Approximations in Light and Heavy Traffic Regimes", journal = j-POMACS, volume = "1", number = "1", pages = "12:1--12:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084449", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084449", abstract = "Mean-field analysis is an analytical method for understanding large-scale stochastic systems such as large-scale data centers and communication networks. The idea is to approximate the stationary distribution of a large-scale stochastic system using the equilibrium point (called the mean-field limit) of a dynamical system (called the mean-field model). This approximation is often justified by proving the weak convergence of stationary distributions to its mean-field limit. Most existing mean-field models concerned the light-traffic regime where the load of the system, denote by $ \rho $, is strictly less than one and is independent of the size of the system. This is because a traditional mean-field model represents the limit of the corresponding stochastic system. Therefore, the load of the mean-field model is $ \rho = l i m N \to \infty \rho (N) $, where $ \rho (N) $ is the load of the stochastic system of size $N$. Now if $ \rho (N) \to 1$ as $ N \to \infty $ (i.e., in the heavy-traffic regime), then $ \rho = 1$. For most systems, the mean-field limits when $ \rho = 1$ are trivial and meaningless. To overcome this difficulty of traditional mean-field models, this paper takes a different point of view on mean-field models. Instead of regarding a mean-field model as the limiting system of large-scale stochastic system, it views the equilibrium point of the mean-field model, called a mean-field solution, simply as an approximation of the stationary distribution of the finite-size system. Therefore both mean-field models and solutions can be functions of $N$. This paper first outlines an analytical method to bound the approximation error based on Stein's method and the perturbation theory. We further present two examples: the M/M/N queueing system and the supermarket model under the power-of-two-choices algorithm. For both applications, the method enables us to characterize the system performance under a broad range of traffic loads. For the supermarket model, this is the first paper that rigorously quantifies the steady-state performance of the-power-of-two-choices in the heavy-traffic regime. These results in the heavy-traffic regime cannot be obtained using the traditional mean-field analysis and the interchange of the limits.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Wang:2017:SGN, author = "Sinong Wang and Ness Shroff", title = "Security Game with Non-additive Utilities and Multiple Attacker Resources", journal = j-POMACS, volume = "1", number = "1", pages = "13:1--13:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084450", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084450", abstract = "There has been significant interest in studying security games for modeling the interplay of attacks and defenses on various systems involving critical infrastructure, financial system security, political campaigns, and civil safeguarding. However, existing security game models typically either assume additive utility functions, or that the attacker can attack only one target. Such assumptions lead to tractable analysis, but miss key inherent dependencies that exist among different targets in current complex networks. In this paper, we generalize the classical security game models to allow for non-additive utility functions. We also allow attackers to be able to attack multiple targets. We examine such a general security game from a theoretical perspective and provide a unified view. In particular, we show that each security game is equivalent to a combinatorial optimization problem over a set system $ \epsilon $, which consists of defender's pure strategy space. The key technique we use is based on the transformation, projection of a polytope, and the ellipsoid method. This work settles several open questions in security game domain and significantly extends the state-of-the-art of both the polynomial solvable and NP-hard class of the security game.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Li:2017:SYE, author = "Lingda Li and Robel Geda and Ari B. Hayes and Yanhao Chen and Pranav Chaudhari and Eddy Z. Zhang and Mario Szegedy", title = "A Simple Yet Effective Balanced Edge Partition Model for Parallel Computing", journal = j-POMACS, volume = "1", number = "1", pages = "14:1--14:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084451", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084451", abstract = "Graph edge partition models have recently become an appealing alternative to graph vertex partition models for distributed computing due to their flexibility in balancing loads and their performance in reducing communication cost [6, 16]. In this paper, we propose a simple yet effective graph edge partitioning algorithm. In practice, our algorithm provides good partition quality (and better than similar state-of-the-art edge partition approaches, at least for power-law graphs) while maintaining low partition overhead. In theory, previous work [6] showed that an approximation guarantee of $ O(d_{\rm max} \sqrt {\log n \log k}) $ apply to the graphs with $ m = \Omega (k^2) $ edges ($k$ is the number of partitions). We further rigorously proved that this approximation guarantee hold for all graphs. We show how our edge partition model can be applied to parallel computing. We draw our example from GPU program locality enhancement and demonstrate that the graph edge partition model does not only apply to distributed computing with many computer nodes, but also to parallel computing in a single computer node with a many-core processor.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Zhou:2017:PSM, author = "You Zhou and Yian Zhou and Min Chen and Shigang Chen", title = "Persistent Spread Measurement for Big Network Data Based on Register Intersection", journal = j-POMACS, volume = "1", number = "1", pages = "15:1--15:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084452", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084452", abstract = "Persistent spread measurement is to count the number of distinct elements that persist in each network flow for predefined time periods. It has many practical applications, including detecting long-term stealthy network activities in the background of normal-user activities, such as stealthy DDoS attack, stealthy network scan, or faked network trend, which cannot be detected by traditional flow cardinality measurement. With big network data, one challenge is to measure the persistent spreads of a massive number of flows without incurring too much memory overhead as such measurement may be performed at the line speed by network processors with fast but small on-chip memory. We propose a highly compact Virtual Intersection HyperLogLog (VI-HLL) architecture for this purpose. It achieves far better memory efficiency than the best prior work of V-Bitmap, and in the meantime drastically extends the measurement range. Theoretical analysis and extensive experiments demonstrate that VI-HLL provides good measurement accuracy even in very tight memory space of less than 1 bit per flow.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Bondorf:2017:QCD, author = "Steffen Bondorf and Paul Nikolaus and Jens B. Schmitt", title = "Quality and Cost of Deterministic Network Calculus: Design and Evaluation of an Accurate and Fast Analysis", journal = j-POMACS, volume = "1", number = "1", pages = "16:1--16:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084453", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084453", abstract = "Networks are integral parts of modern safety-critical systems and certification demands the provision of guarantees for data transmissions. Deterministic Network Calculus (DNC) can compute a worst-case bound on a data flow's end-to-end delay. Accuracy of DNC results has been improved steadily, resulting in two DNC branches: the classical algebraic analysis and the more recent optimization-based analysis. The optimization-based branch provides a theoretical solution for tight bounds. Its computational cost grows, however, (possibly super-)exponentially with the network size. Consequently, a heuristic optimization formulation trading accuracy against computational costs was proposed. In this article, we challenge optimization-based DNC with a new algebraic DNC algorithm. We show that: no current optimization formulation scales well with the network size and algebraic DNC can be considerably improved in both aspects, accuracy and computational cost. To that end, we contribute a novel DNC algorithm that transfers the optimization's search for best attainable delay bounds to algebraic DNC. It achieves a high degree of accuracy and our novel efficiency improvements reduce the cost of the analysis dramatically. In extensive numerical experiments, we observe that our delay bounds deviate from the optimization-based ones by only 1.142\% on average while computation times simultaneously decrease by several orders of magnitude.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Gast:2017:EVE, author = "Nicolas Gast", title = "Expected Values Estimated via Mean-Field Approximation are {$ 1 / N $}-Accurate", journal = j-POMACS, volume = "1", number = "1", pages = "17:1--17:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084454", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084454", abstract = "Mean-field approximation is a powerful tool to study large-scale stochastic systems such as data-centers --- one example being the famous power of two-choice paradigm. It is shown in the literature that under quite general conditions, the empirical measure of a system of $N$ interacting objects converges at rate $ O(1 \sqrt {N}) $ to a deterministic dynamical system, called its mean-field approximation. In this paper, we revisit the accuracy of mean-field approximation by focusing on expected values. We show that, under almost the same general conditions, the expectation of any performance functional converges at rate $ O(1 / N) $ to its mean-field approximation. Our result applies for finite and infinite-dimensional mean-field models. We also develop a new perturbation theory argument that shows that the result holds for the stationary regime if the dynamical system is asymptotically exponentially stable. We provide numerical experiments that demonstrate that this rate of convergence is tight and that illustrate the necessity of our conditions. As an example, we apply our result to the classical two-choice model. By combining our theory with numerical experiments, we claim that, as the load rho goes to 1, the average queue length of a two-choice system with $N$ servers is $ \log 2 1 / (1 - \rho) + 1 / (2 N (1 - \rho) + O(1 / N^2))$.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Wang:2017:CMP, author = "Brandon Wang and Xiaoye Li and Leandro P. de Aguiar and Daniel S. Menasche and Zubair Shafiq", title = "Characterizing and Modeling Patching Practices of Industrial Control Systems", journal = j-POMACS, volume = "1", number = "1", pages = "18:1--18:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084455", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084455", abstract = "Industrial Control Systems (ICS) are widely deployed in mission critical infrastructures such as manufacturing, energy, and transportation. The mission critical nature of ICS devices poses important security challenges for ICS vendors and asset owners. In particular, the patching of ICS devices is usually deferred to scheduled production outages so as to prevent potential operational disruption of critical systems. Unfortunately, anecdotal evidence suggests that ICS devices are riddled with security vulnerabilities that are not patched in a timely manner, which leaves them vulnerable to exploitation by hackers, nation states, and hacktivist organizations. In this paper, we present the results from our longitudinal measurement and characterization study of ICS patching behavior. Our study is based on IP scan data collected from Shodan over the duration of three years for more than 500 known industrial ICS protocols and products. Our longitudinal measurements reveal the impact of vulnerability disclosures on ICS patching. Our analysis of more than 100 thousand Internet-exposed ICS devices reveals that about 50\% upgrade to newer patched versions within 60 days of a vulnerability disclosure. Based on our measurement and analysis, we further propose a variation of the Bass model to forecast the patching behavior of ICS devices. The evaluation shows that our proposed models have comparable prediction accuracy when contrasted against traditional ARIMA timeseries forecasting models, while requiring less parameters and being amenable to direct physical interpretation.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Formby:2017:CSP, author = "David Formby and Anwar Walid and Raheem Beyah", title = "A Case Study in Power Substation Network Dynamics", journal = j-POMACS, volume = "1", number = "1", pages = "19:1--19:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084456", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084456", abstract = "The modern world is becoming increasingly dependent on computing and communication technology to function, but unfortunately its application and impact on areas such as critical infrastructure and industrial control system (ICS) networks remains to be thoroughly studied. Significant research has been conducted to address the myriad security concerns in these areas, but they are virtually all based on artificial testbeds or simulations designed on assumptions about their behavior either from knowledge of traditional IT networking or from basic principles of ICS operation. In this work, we provide the most detailed characterization of an example ICS to date in order to determine if these common assumptions hold true. A live power distribution substation is observed over the course of two and a half years to measure its behavior and evolution over time. Then, a horizontal study is conducted that compared this behavior with three other substations from the same company. Although most predictions were found to be correct, some unexpected behavior was observed that highlights the fundamental differences between ICS and IT networks including round trip times dominated by processing speed as opposed to network delay, several well known TCP features being largely irrelevant, and surprisingly large jitter from devices running real-time operating systems. The impact of these observations is discussed in terms of generality to other embedded networks, network security applications, and the suitability of the TCP protocol for this environment.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Nguyen:2017:OIC, author = "Hung T. Nguyen and Tri P. Nguyen and Tam N. Vu and Thang N. Dinh", title = "Outward Influence and Cascade Size Estimation in Billion-scale Networks", journal = j-POMACS, volume = "1", number = "1", pages = "20:1--20:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084457", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084457", abstract = "Estimating cascade size and nodes' influence is a fundamental task in social, technological, and biological networks. Yet this task is extremely challenging due to the sheer size and the structural heterogeneity of networks. We investigate a new influence measure, termed outward influence (OI), defined as the (expected) number of nodes that a subset of nodes S will activate, excluding the nodes in S. Thus, OI equals, the de facto standard measure, influence spread of S minus | S |. OI is not only more informative for nodes with small influence, but also, critical in designing new effective sampling and statistical estimation methods. Based on OI, we propose SIEA/SOIEA, novel methods to estimate influence spread/outward influence at scale and with rigorous theoretical guarantees. The proposed methods are built on two novel components (1) IICP an important sampling method for outward influence; and (2) RSA, a robust mean estimation method that minimize the number of samples through analyzing variance and range of random variables. Compared to the state-of-the art for influence estimation, SIEA is $ \Omega (\log 4 n) $ times faster in theory and up to several orders of magnitude faster in practice. For the first time, influence of nodes in the networks of billions of edges can be estimated with high accuracy within a few minutes. Our comprehensive experiments on real-world networks also give evidence against the popular practice of using a fixed number, e.g. 10K or 20K, of samples to compute the 'ground truth' for influence spread.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Choi:2017:EDL, author = "Wonil Choi and Mohammad Arjomand and Myoungsoo Jung and Mahmut Kandemir", title = "Exploiting Data Longevity for Enhancing the Lifetime of Flash-based Storage Class Memory", journal = j-POMACS, volume = "1", number = "1", pages = "21:1--21:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084458", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084458", abstract = "Storage-class memory (SCM) combines the benefits of a solid-state memory, such as high-performance and robustness, with the archival capabilities and low cost of conventional hard-disk magnetic storage. Among candidate solid-state nonvolatile memory technologies that could potentially be used to construct SCM, flash memory is a well-established technology and have been widely used in commercially available SCM incarnations. Flash-based SCM enables much better tradeoffs between performance, space and power than disk-based systems. However, write endurance is a significant challenge for a flash-based SCM (each act of writing a bit may slightly damage a cell, so one flash cell can be written 10^4-10^5 times, depending on the flash technology, before it becomes unusable). This is a well-documented problem and has received a lot of attention by manufactures that are using some combination of write reduction and wear-leveling techniques for achieving longer lifetime. In an effort to improve flash lifetime, first, by quantifying data longevity in an SCM, we show that a majority of the data stored in a solid-state SCM do not require long retention times provided by flash memory (i.e., up to 10 years in modern devices); second, by exploiting retention time relaxation, we propose a novel mechanism, called Dense-SLC (D-SLC), which enables us perform multiple writes into a cell during each erase cycle for lifetime extension; and finally, we discuss the required changes in the flash management software (FTL) in order to use D-SLC mechanism for extending the lifetime of the solid-state part of an SCM. Using an extensive simulation-based analysis of an SLC flash-based SCM, we demonstrate that D-SLC is able to significantly improve device lifetime (between 5.1X and 8.6X) with no performance overhead and also very small changes at the FTL software.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Venkatakrishnan:2017:DRB, author = "Shaileshh Bojja Venkatakrishnan and Giulia Fanti and Pramod Viswanath", title = "{Dandelion}: Redesigning the {Bitcoin} Network for Anonymity", journal = j-POMACS, volume = "1", number = "1", pages = "22:1--22:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084459", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084459", abstract = "Bitcoin and other cryptocurrencies have surged in popularity over the last decade. Although Bitcoin does not claim to provide anonymity for its users, it enjoys a public perception of being a privacy preserving financial system. In reality, cryptocurrencies publish users' entire transaction histories in plaintext, albeit under a pseudonym; this is required for transaction validation. Therefore, if a user's pseudonym can be linked to their human identity, the privacy fallout can be significant. Recently, researchers have demonstrated deanonymization attacks that exploit weaknesses in the Bitcoin network's peer-to-peer (P2P) networking protocols. In particular, the P2P network currently forwards content in a structured way that allows observers to deanonymize users. In this work, we redesign the P2P network from first principles with the goal of providing strong, provable anonymity guarantees. We propose a simple networking policy called Dandelion which provides quasi-optimal, network-wide anonymity, with minimal cost to the network's utility. We also discuss practical implementation challenges and propose heuristic solutions.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Zhang:2017:OPP, author = "Zijun Zhang and Zongpeng Li and Chuan Wu", title = "Optimal Posted Prices for Online Cloud Resource Allocation", journal = j-POMACS, volume = "1", number = "1", pages = "23:1--23:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084460", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084460", abstract = "We study online resource allocation in a cloud computing platform through posted pricing: The cloud provider publishes a unit price for each resource type, which may vary over time; upon arrival at the cloud system, a cloud user either takes the current prices, renting resources to execute its job, or refuses the prices without running its job there. We design pricing functions based on current resource utilization ratios, in a wide array of demand-supply relationships and resource occupation durations, and prove worst-case competitive ratios in social welfare. In the basic case of a single-type, non-recycled resource (allocated resources are not later released for reuse), we prove that our pricing function design is optimal, in that it achieves the smallest competitive ratio among all possible pricing functions. Insights obtained from the basic case are then used to generalize the pricing functions to more realistic cloud systems with multiple types of resources, where a job occupies allocated resources for a number of time slots till completion, upon which time the resources are returned to the cloud resource pool.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Sun:2017:ASM, author = "Wen Sun and Veronique Simon and Sebastien Monnet and Philippe Robert and Pierre Sens", title = "Analysis of a Stochastic Model of Replication in Large Distributed Storage Systems: A Mean-Field Approach", journal = j-POMACS, volume = "1", number = "1", pages = "24:1--24:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084462", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084462", abstract = "Distributed storage systems such as Hadoop File System or Google File System (GFS) ensure data availability and durability using replication. Persistence is achieved by replicating the same data block on several nodes, and ensuring that a minimum number of copies are available on the system at any time. Whenever the contents of a node are lost, for instance due to a hard disk crash, the system regenerates the data blocks stored before the failure by transferring them from the remaining replicas. This paper is focused on the analysis of the efficiency of replication mechanism that determines the location of the copies of a given file at some server. The variability of the loads of the nodes of the network is investigated for several policies. Three replication mechanisms are tested against simulations in the context of a real implementation of a such a system: Random, Least Loaded and Power of Choice. The simulations show that some of these policies may lead to quite unbalanced situations: if $ \beta $ is the average number of copies per node it turns out that, at equilibrium, the load of the nodes may exhibit a high variability. It is shown in this paper that a simple variant of a power of choice type algorithm has a striking effect on the loads of the nodes: at equilibrium, the distribution of the load of a node has a bounded support, most of nodes have a load less than $ 2 \beta $ which is an interesting property for the design of the storage space of these systems. Stochastic models are introduced and investigated to explain this interesting phenomenon.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Mukherjee:2017:OSE, author = "Debankur Mukherjee and Souvik Dhara and Sem C. Borst and Johan S. H. van Leeuwaarden", title = "Optimal Service Elasticity in Large-Scale Distributed Systems", journal = j-POMACS, volume = "1", number = "1", pages = "25:1--25:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084463", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084463", abstract = "A fundamental challenge in large-scale cloud networks and data centers is to achieve highly efficient server utilization and limit energy consumption, while providing excellent user-perceived performance in the presence of uncertain and time-varying demand patterns. Auto-scaling provides a popular paradigm for automatically adjusting service capacity in response to demand while meeting performance targets, and queue-driven auto-scaling techniques have been widely investigated in the literature. In typical data center architectures and cloud environments however, no centralized queue is maintained, and load balancing algorithms immediately distribute incoming tasks among parallel queues. In these distributed settings with vast numbers of servers, centralized queue-driven auto-scaling techniques involve a substantial communication overhead and major implementation burden, or may not even be viable at all. Motivated by the above issues, we propose a joint auto-scaling and load balancing scheme which does not require any global queue length information or explicit knowledge of system parameters, and yet provides provably near-optimal service elasticity. We establish the fluid-level dynamics for the proposed scheme in a regime where the total traffic volume and nominal service capacity grow large in proportion. The fluid-limit results show that the proposed scheme achieves asymptotic optimality in terms of user-perceived delay performance as well as energy consumption. Specifically, we prove that both the waiting time of tasks and the relative energy portion consumed by idle servers vanish in the limit. At the same time, the proposed scheme operates in a distributed fashion and involves only constant communication overhead per task, thus ensuring scalability in massive data center operations. Extensive simulation experiments corroborate the fluid-limit results, and demonstrate that the proposed scheme can match the user performance and energy consumption of state-of-the-art approaches that do take full advantage of a centralized queue.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Lee:2017:DIL, author = "Donghyuk Lee and Samira Khan and Lavanya Subramanian and Saugata Ghose and Rachata Ausavarungnirun and Gennady Pekhimenko and Vivek Seshadri and Onur Mutlu", title = "Design-Induced Latency Variation in Modern {DRAM} Chips: Characterization, Analysis, and Latency Reduction Mechanisms", journal = j-POMACS, volume = "1", number = "1", pages = "26:1--26:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084464", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084464", abstract = "Variation has been shown to exist across the cells within a modern DRAM chip. Prior work has studied and exploited several forms of variation, such as manufacturing-process- or temperature-induced variation. We empirically demonstrate a new form of variation that exists within a real DRAM chip, induced by the design and placement of different components in the DRAM chip: different regions in DRAM, based on their relative distances from the peripheral structures, require different minimum access latencies for reliable operation. In particular, we show that in most real DRAM chips, cells closer to the peripheral structures can be accessed much faster than cells that are farther. We call this phenomenon design-induced variation in DRAM. Our goals are to (i) understand design-induced variation that exists in real, state-of-the-art DRAM chips, (ii) exploit it to develop low-cost mechanisms that can dynamically find and use the lowest latency at which to operate a DRAM chip reliably, and, thus, (iii) improve overall system performance while ensuring reliable system operation. To this end, we first experimentally demonstrate and analyze designed-induced variation in modern DRAM devices by testing and characterizing 96 DIMMs (768 DRAM chips). Our characterization identifies DRAM regions that are vulnerable to errors, if operated at lower latency, and finds consistency in their locations across a given DRAM chip generation, due to design-induced variation. Based on our extensive experimental analysis, we develop two mechanisms that reliably reduce DRAM latency. First, DIVA Profiling uses runtime profiling to dynamically identify the lowest DRAM latency that does not introduce failures. DIVA Profiling exploits design-induced variation and periodically profiles only the vulnerable regions to determine the lowest DRAM latency at low cost. It is the first mechanism to dynamically determine the lowest latency that can be used to operate DRAM reliably. DIVA Profiling reduces the latency of read/write requests by 35.1\% / 57.8\%, respectively, at 55$^\ocirc $C. Our second mechanism, DIVA Shuffling, shuffles data such that values stored in vulnerable regions are mapped to multiple error-correcting code (ECC) codewords. As a result, DIVA Shuffling can correct 26\% more multi-bit errors than conventional ECC. Combined together, our two mechanisms reduce read/write latency by 40.0\%/60.5\%, which translates to an overall system performance improvement of 14.7\%/13.7\%/13.8\% (in 2-/4-/8-core systems) across a variety of workloads, while ensuring reliable operation.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Avrachenkov:2017:LCA, author = "Konstantin Avrachenkov and Jasper Goseling and Berksan Serbetci", title = "A Low-Complexity Approach to Distributed Cooperative Caching with Geographic Constraints", journal = j-POMACS, volume = "1", number = "1", pages = "27:1--27:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084465", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib", URL = "http://dl.acm.org/citation.cfm?id=3084465", abstract = "We consider caching in cellular networks in which each base station is equipped with a cache that can store a limited number of files. The popularity of the files is known and the goal is to place files in the caches such that the probability that a user at an arbitrary location in the plane will find the file that she requires in one of the covering caches is maximized. We develop distributed asynchronous algorithms for deciding which contents to store in which cache. Such cooperative algorithms require communication only between caches with overlapping coverage areas and can operate in asynchronous manner. The development of the algorithms is principally based on an observation that the problem can be viewed as a potential game. Our basic algorithm is derived from the best response dynamics. We demonstrate that the complexity of each best response step is independent of the number of files, linear in the cache capacity and linear in the maximum number of base stations that cover a certain area. Then, we show that the overall algorithm complexity for a discrete cache placement is polynomial in both network size and catalog size. In practical examples, the algorithm converges in just a few iterations. Also, in most cases of interest, the basic algorithm finds the best Nash equilibrium corresponding to the global optimum. We provide two extensions of our basic algorithm based on stochastic and deterministic simulated annealing which find the global optimum. Finally, we demonstrate the hit probability evolution on real and synthetic networks numerically and show that our distributed caching algorithm performs significantly better than storing the most popular content, probabilistic content placement policy and Multi-LRU caching policies.", acknowledgement = ack-nhfb, articleno = "27", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", }