Valid HTML 4.0! Valid CSS!
%%% -*-BibTeX-*-
%%% ====================================================================
%%%  BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.00",
%%%     date            = "16 June 2017",
%%%     time            = "09:35:37 MST",
%%%     filename        = "pomacs.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "22676 1549 8942 82901",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "bibliography; BibTeX; Proceedings of the ACM
%%%                        on Measurement and Analysis of Computing
%%%                        Systems (POMACS)",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        Proceedings of the ACM on Measurement and
%%%                        Analysis of Computing Systems (POMACS) (CODEN
%%%                        ????, ISSN 2476-1249).  The journal appears
%%%                        annually, and publication began with volume
%%%                        1, number 1, in June 2017.
%%%
%%%                        At version 1.00, the COMPLETE journal
%%%                        coverage looked like this:
%%%
%%%                             2017 (  27)
%%%
%%%                             Article:         27
%%%
%%%                             Total entries:   27
%%%
%%%                        The journal Web pages can be found at:
%%%
%%%                            http://pomacs.acm.org/
%%%                            http://pomacs.acm.org/archive-toc.cfm
%%%
%%%                        The journal table of contents page is at:
%%%
%%%                            http://dl.acm.org/pub.cfm?id=J1567
%%%
%%%                        Qualified subscribers can retrieve the full
%%%                        text of recent articles in PDF form.
%%%
%%%                        The initial draft was extracted from the ACM
%%%                        Web pages.
%%%
%%%                        ACM copyrights explicitly permit abstracting
%%%                        with credit, so article abstracts, keywords,
%%%                        and subject classifications have been
%%%                        included in this bibliography wherever
%%%                        available.  Article reviews have been
%%%                        omitted, until their copyright status has
%%%                        been clarified.
%%%
%%%                        bibsource keys in the bibliography entries
%%%                        below indicate the entry originally came
%%%                        from the computer science bibliography
%%%                        archive, even though it has likely since
%%%                        been corrected and updated.
%%%
%%%                        URL keys in the bibliography point to
%%%                        World Wide Web locations of additional
%%%                        information about the entry.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, using ``bibsort -byvolume.''
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility.",
%%%  }
%%% ====================================================================
@Preamble{"\input bibnames.sty" #
    "\def \TM {${}^{\sc TM}$}"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:
@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:
@String{j-POMACS                = "Proceedings of the ACM on Measurement and
                                   Analysis of Computing Systems (POMACS)"}

%%% ====================================================================
%%% Bibliography entries:
@Article{Chaintreau:2017:E,
  author =       "Augustin Chaintreau and Leana Golubchik and Zhi-Li
                 Zhang",
  title =        "Editorial",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3105875",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3105875",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Gibbens:2017:HND,
  author =       "Mathias Gibbens and Chris Gniady and Lei Ye and
                 Beichuan Zhang",
  title =        "{Hadoop} on Named Data Networking: Experience and
                 Results",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084439",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084439",
  abstract =     "The Named Data Networking (NDN) architecture retrieves
                 content by names rather than connecting to specific
                 hosts. It provides benefits such as highly efficient
                 and resilient content distribution, which fit well to
                 data-intensive distributed computing. This paper
                 presents and discusses our experience in modifying
                 Apache Hadoop, a popular MapReduce framework, to
                 operate on an NDN network. Through this
                 first-of-its-kind implementation process, we
                 demonstrate the feasibility of running an existing,
                 large, and complex piece of distributed software
                 commonly seen in data centers over NDN. We show
                 advantages such as simplified network code and reduced
                 network traffic which are beneficial in a data center
                 environment. There are also challenges faced by NDN,
                 that are being addressed by the community, which can be
                 magnified under data center traffic. Through detailed
                 evaluation, we show a reduction of 16\% for overall
                 data transmission between Hadoop nodes while writing
                 data with default replication settings. Preliminary
                 results also show promise for in-network caching of
                 repeated reads in distributed applications. We also
                 show that overall performance is currently slower under
                 NDN, and we identify challenges and opportunities for
                 further NDN improvements.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Gong:2017:QPS,
  author =       "Long Gong and Paul Tune and Liang Liu and Sen Yang and
                 Jun (Jim) Xu",
  title =        "Queue-Proportional Sampling: A Better Approach to
                 Crossbar Scheduling for Input-Queued Switches",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084440",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084440",
  abstract =     "Most present day switching systems, in Internet
                 routers and data-center switches, employ a single
                 input-queued crossbar to interconnect input ports with
                 output ports. Such switches need to compute a matching,
                 between input and output ports, for each switching
                 cycle (time slot). The main challenge in designing such
                 matching algorithms is to deal with the unfortunate
                 tradeoff between the quality of the computed matching
                 and the computational complexity of the algorithm. In
                 this paper, we propose a general approach that can
                 significantly boost the performance of both SERENA and
                 iSLIP, yet incurs only O(1) additional computational
                 complexity at each input/output port. Our approach is a
                 novel proposing strategy, called Queue-Proportional
                 Sampling (QPS), that generates an excellent starter
                 matching. We show, through rigorous simulations, that
                 when starting with this starter matching, iSLIP and
                 SERENA can output much better final matching decisions,
                 as measured by the resulting throughput and delay
                 performance, than they otherwise can.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Quach:2017:ILT,
  author =       "Alan Quach and Zhongjie Wang and Zhiyun Qian",
  title =        "Investigation of the 2016 {Linux TCP} Stack
                 Vulnerability at Scale",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084441",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084441",
  abstract =     "To combat blind in-window attacks against TCP, changes
                 proposed in RFC 5961 have been implemented by Linux
                 since late 2012. While successfully eliminating the old
                 vulnerabilities, the new TCP implementation was
                 reported in August 2016 to have introduced a subtle yet
                 serious security flaw. Assigned CVE-2016-5696, the flaw
                 exploits the challenge ACK rate limiting feature that
                 could allow an off-path attacker to infer the
                 presence/absence of a TCP connection between two
                 arbitrary hosts, terminate such a connection, and even
                 inject payload into an unsecured TCP connection. In
                 this work, we perform a comprehensive measurement of
                 the impact of the new vulnerability. This includes (1)
                 tracking the vulnerable Internet servers, (2)
                 monitoring the patch behavior over time, (3) picturing
                 the overall security status of TCP stacks at scale.
                 Towards this goal, we design a scalable measurement
                 methodology to scan the Alexa top 1 million websites
                 for almost 6 months. We also present how notifications
                 impact the patching behavior, and compare the result
                 with the Heartbleed and the Debian PRNG vulnerability.
                 The measurement represents a valuable data point in
                 understanding how Internet servers react to serious
                 security flaws in the operating system kernel.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Sharma:2017:PDR,
  author =       "Prateek Sharma and David Irwin and Prashant Shenoy",
  title =        "Portfolio-driven Resource Management for Transient
                 Cloud Servers",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084442",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib;
                 http://www.math.utah.edu/pub/tex/bib/pvm.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084442",
  abstract =     "Cloud providers have begun to offer their surplus
                 capacity in the form of low-cost transient servers,
                 which can be revoked unilaterally at any time. While
                 the low cost of transient servers makes them attractive
                 for a wide range of applications, such as data
                 processing and scientific computing, failures due to
                 server revocation can severely degrade application
                 performance. Since different transient server types
                 offer different cost and availability tradeoffs, we
                 present the notion of server portfolios that is based
                 on financial portfolio modeling. Server portfolios
                 enable construction of an 'optimal' mix of severs to
                 meet an application's sensitivity to cost and
                 revocation risk. We implement model-driven portfolios
                 in a system called ExoSphere, and show how diverse
                 applications can use portfolios and
                 application-specific policies to gracefully handle
                 transient servers. We show that ExoSphere enables
                 widely-used parallel applications such as Spark, MPI,
                 and BOINC to be made transiency-aware with modest
                 effort. Our experiments show that allowing the
                 applications to use suitable transiency-aware policies,
                 ExoSphere is able to achieve 80\% cost savings when
                 compared to on-demand servers and greatly reduces
                 revocation risk compared to existing approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Cao:2017:DEC,
  author =       "Yi Cao and Javad Nejati and Muhammad Wajahat and Aruna
                 Balasubramanian and Anshul Gandhi",
  title =        "Deconstructing the Energy Consumption of the Mobile
                 Page Load",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084443",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084443",
  abstract =     "Modeling the energy consumption of applications on
                 mobile devices is an important topic that has received
                 much attention in recent years. However, there has been
                 very little research on modeling the energy consumption
                 of the mobile Web. This is primarily due to the
                 short-lived yet complex page load process that makes it
                 infeasible to rely on coarse-grained resource
                 monitoring for accurate power estimation. We present
                 RECON, a modeling approach that accurately estimates
                 the energy consumption of any Web page load and
                 deconstructs it into the energy contributions of
                 individual page load activities. Our key intuition is
                 to leverage low-level application semantics in addition
                 to coarse-grained resource utilizations for modeling
                 the page load energy consumption. By exploiting
                 fine-grained information about the individual
                 activities that make up the page load, RECON enables
                 fast and accurate energy estimations without requiring
                 complex models. Experiments across 80 Web pages and
                 under four different optimizations show that RECON can
                 estimate the energy consumption for a Web page load
                 with an average error of less than 7\%. Importantly,
                 RECON helps to analyze and explain the energy effects
                 of an optimization on the individual components of Web
                 page loads.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Wang:2017:OTS,
  author =       "Xin Wang and Richard T. B. Ma and Yinlong Xu",
  title =        "On Optimal Two-Sided Pricing of Congested Networks",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084444",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084444",
  abstract =     "Traditionally, Internet Access Providers (APs) only
                 charge end-users for Internet access services; however,
                 to recoup infrastructure costs and increase revenues,
                 some APs have recently adopted two-sided pricing
                 schemes under which both end-users and content
                 providers are charged. Meanwhile, with the rapid growth
                 of traffic, network congestion could seriously degrade
                 user experiences and influence providers' utility. To
                 optimize profit and social welfare, APs and regulators
                 need to design appropriate pricing strategies and
                 regulatory policies that take the effects of network
                 congestion into consideration. In this paper, we model
                 two-sided networks under which users' traffic demands
                 are influenced by exogenous pricing and endogenous
                 congestion parameters and derive the system congestion
                 under an equilibrium. We characterize the structures
                 and sensitivities of profit- and welfare-optimal
                 two-sided pricing schemes and reveal that (1) the
                 elasticity of system throughput plays a crucial role in
                 determining the structures of optimal pricing, (2) the
                 changes of optimal pricing under varying AP's capacity
                 and users' congestion sensitivity are largely driven by
                 the type of data traffic, e.g., text or video, and (3)
                 APs and regulators will be incentivized to shift from
                 one-sided to two-sided pricing when APs' capacities and
                 user demand for video traffic grow. Our results can
                 help APs design optimal two-sided pricing and guide
                 regulators to legislate desirable policies.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Casale:2017:API,
  author =       "Giuliano Casale",
  title =        "Accelerating Performance Inference over Closed Systems
                 by Asymptotic Methods",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084445",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084445",
  abstract =     "Recent years have seen a rapid growth of interest in
                 exploiting monitoring data collected from enterprise
                 applications for automated management and performance
                 analysis. In spite of this trend, even simple
                 performance inference problems involving queueing
                 theoretic formulas often incur computational
                 bottlenecks, for example upon computing likelihoods in
                 models of batch systems. Motivated by this issue, we
                 revisit the solution of multiclass closed queueing
                 networks, which are popular models used to describe
                 batch and distributed applications with parallelism
                 constraints. We first prove that the normalizing
                 constant of the equilibrium state probabilities of a
                 closed model can be reformulated exactly as a
                 multidimensional integral over the unit simplex. This
                 gives as a by-product novel explicit expressions for
                 the multiclass normalizing constant. We then derive a
                 method based on cubature rules to efficiently evaluate
                 the proposed integral form in small and medium-sized
                 models. For large models, we propose novel asymptotic
                 expansions and Monte Carlo sampling methods to
                 efficiently and accurately approximate normalizing
                 constants and likelihoods. We illustrate the resulting
                 accuracy gains in problems involving optimization-based
                 inference.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Ju:2017:HLS,
  author =       "Xiaoen Ju and Hani Jamjoom and Kang G. Shin",
  title =        "{Hieroglyph}: Locally-Sufficient Graph Processing via
                 Compute--Sync--Merge",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084446",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084446",
  abstract =     "Despite their widespread adoption, large-scale graph
                 processing systems do not fully decouple computation
                 and communication, often yielding suboptimal
                 performance. Locally-sufficient computation-computation
                 that relies only on the graph state local to a
                 computing host-can mitigate the effects of this
                 coupling. In this paper, we present
                 Compute--Sync--Merge (CSM), a new programming
                 abstraction that achieves efficient locally-sufficient
                 computation. CSM enforces local sufficiency at the
                 programming abstraction level and enables the
                 activation of vertex-centric computation on all vertex
                 replicas, thus supporting vertex-cut partitioning. We
                 demonstrate the simplicity of expressing several
                 fundamental graph algorithms in CSM. Hieroglyph-our
                 implementation of a graph processing system with CSM
                 support-outperforms state of the art by up to 53x, with
                 a median speedup of 3.5x and an average speedup of 6x
                 across a wide range of datasets.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Chang:2017:URV,
  author =       "Kevin K. Chang and A. Giray Ya{\u{a}}lik{\c{c}}i and
                 Saugata Ghose and Aditya Agrawal and Niladrish
                 Chatterjee and Abhijith Kashyap and Donghyuk Lee and
                 Mike O'Connor and Hasan Hassan and Onur Mutlu",
  title =        "Understanding Reduced-Voltage Operation in Modern
                 {DRAM} Devices: Experimental Characterization,
                 Analysis, and Mechanisms",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084447",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084447",
  abstract =     "The energy consumption of DRAM is a critical concern
                 in modern computing systems. Improvements in
                 manufacturing process technology have allowed DRAM
                 vendors to lower the DRAM supply voltage
                 conservatively, which reduces some of the DRAM energy
                 consumption. We would like to reduce the DRAM supply
                 voltage more aggressively, to further reduce energy.
                 Aggressive supply voltage reduction requires a thorough
                 understanding of the effect voltage scaling has on DRAM
                 access latency and DRAM reliability. In this paper, we
                 take a comprehensive approach to understanding and
                 exploiting the latency and reliability characteristics
                 of modern DRAM when the supply voltage is lowered below
                 the nominal voltage level specified by DRAM standards.
                 Using an FPGA-based testing platform, we perform an
                 experimental study of 124 real DDR3L (low-voltage) DRAM
                 chips manufactured recently by three major DRAM
                 vendors. We find that reducing the supply voltage below
                 a certain point introduces bit errors in the data, and
                 we comprehensively characterize the behavior of these
                 errors. We discover that these errors can be avoided by
                 increasing the latency of three major DRAM operations
                 (activation, restoration, and precharge). We perform
                 detailed DRAM circuit simulations to validate and
                 explain our experimental findings. We also characterize
                 the various relationships between reduced supply
                 voltage and error locations, stored data patterns, DRAM
                 temperature, and data retention. Based on our
                 observations, we propose a new DRAM energy reduction
                 mechanism, called Voltron. The key idea of Voltron is
                 to use a performance model to determine by how much we
                 can reduce the supply voltage without introducing
                 errors and without exceeding a user-specified threshold
                 for performance loss. Our evaluations show that Voltron
                 reduces the average DRAM and system energy consumption
                 by 10.5\% and 7.3\%, respectively, while limiting the
                 average system performance loss to only 1.8\%, for a
                 variety of memory-intensive quad-core workloads. We
                 also show that Voltron significantly outperforms prior
                 dynamic voltage and frequency scaling mechanisms for
                 DRAM.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Wang:2017:UBI,
  author =       "Cheng Wang and Bhuvan Urgaonkar and Neda Nasiriani and
                 George Kesidis",
  title =        "Using Burstable Instances in the Public Cloud: Why,
                 When and How?",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084448",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib;
                 http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084448",
  abstract =     "Amazon EC2 and Google Compute Engine (GCE) have
                 recently introduced a new class of virtual machines
                 called 'burstable' instances that are cheaper than even
                 the smallest traditional/regular instances. These lower
                 prices come with reduced average capacity and increased
                 variance. Using measurements from both EC2 and GCE, we
                 identify key idiosyncrasies of resource capacity
                 dynamism for burstable instances that set them apart
                 from other instance types. Most importantly, certain
                 resources for these instances appear to be regulated by
                 deterministic token bucket like mechanisms. We find
                 widely different types of disclosures by providers of
                 the parameters governing these regulation mechanisms:
                 full disclosure (e.g., CPU capacity for EC2 t2
                 instances), partial disclosure (e.g., CPU capacity and
                 remote disk IO bandwidth for GCE shared-core
                 instances), or no disclosure (network bandwidth for EC2
                 t2 instances). A tenant modeling these variations as
                 random phenomena (as some recent work suggests) might
                 make sub-optimal procurement and operation decisions.
                 We present modeling techniques for a tenant to infer
                 the properties of these regulation mechanisms via
                 simple offline measurements. We also present two case
                 studies of how certain memcached workloads might
                 benefit from our modeling when operating on EC2 by: (i)
                 augmenting cheap but low availability in-memory storage
                 offered by spot instances with backup of popular
                 content on burstable instances, and (ii) temporal
                 multiplexing of multiple burstable instances to achieve
                 the CPU or network bandwidth (and thereby throughput)
                 equivalent of a more expensive regular EC2 instance.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Ying:2017:SMM,
  author =       "Lei Ying",
  title =        "{Stein}'s Method for Mean Field Approximations in
                 Light and Heavy Traffic Regimes",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084449",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084449",
  abstract =     "Mean-field analysis is an analytical method for
                 understanding large-scale stochastic systems such as
                 large-scale data centers and communication networks.
                 The idea is to approximate the stationary distribution
                 of a large-scale stochastic system using the
                 equilibrium point (called the mean-field limit) of a
                 dynamical system (called the mean-field model). This
                 approximation is often justified by proving the weak
                 convergence of stationary distributions to its
                 mean-field limit. Most existing mean-field models
                 concerned the light-traffic regime where the load of
                 the system, denote by $ \rho $, is strictly less than
                 one and is independent of the size of the system. This
                 is because a traditional mean-field model represents
                 the limit of the corresponding stochastic system.
                 Therefore, the load of the mean-field model is $ \rho =
                 l i m N \to \infty \rho (N) $, where $ \rho (N) $ is
                 the load of the stochastic system of size $N$. Now if $
                 \rho (N) \to 1$ as $ N \to \infty $ (i.e., in the
                 heavy-traffic regime), then $ \rho = 1$. For most
                 systems, the mean-field limits when $ \rho = 1$ are
                 trivial and meaningless. To overcome this difficulty of
                 traditional mean-field models, this paper takes a
                 different point of view on mean-field models. Instead
                 of regarding a mean-field model as the limiting system
                 of large-scale stochastic system, it views the
                 equilibrium point of the mean-field model, called a
                 mean-field solution, simply as an approximation of the
                 stationary distribution of the finite-size system.
                 Therefore both mean-field models and solutions can be
                 functions of $N$. This paper first outlines an
                 analytical method to bound the approximation error
                 based on Stein's method and the perturbation theory. We
                 further present two examples: the M/M/N queueing system
                 and the supermarket model under the
                 power-of-two-choices algorithm. For both applications,
                 the method enables us to characterize the system
                 performance under a broad range of traffic loads. For
                 the supermarket model, this is the first paper that
                 rigorously quantifies the steady-state performance of
                 the-power-of-two-choices in the heavy-traffic regime.
                 These results in the heavy-traffic regime cannot be
                 obtained using the traditional mean-field analysis and
                 the interchange of the limits.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Wang:2017:SGN,
  author =       "Sinong Wang and Ness Shroff",
  title =        "Security Game with Non-additive Utilities and Multiple
                 Attacker Resources",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084450",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084450",
  abstract =     "There has been significant interest in studying
                 security games for modeling the interplay of attacks
                 and defenses on various systems involving critical
                 infrastructure, financial system security, political
                 campaigns, and civil safeguarding. However, existing
                 security game models typically either assume additive
                 utility functions, or that the attacker can attack only
                 one target. Such assumptions lead to tractable
                 analysis, but miss key inherent dependencies that exist
                 among different targets in current complex networks. In
                 this paper, we generalize the classical security game
                 models to allow for non-additive utility functions. We
                 also allow attackers to be able to attack multiple
                 targets. We examine such a general security game from a
                 theoretical perspective and provide a unified view. In
                 particular, we show that each security game is
                 equivalent to a combinatorial optimization problem over
                 a set system $ \epsilon $, which consists of defender's
                 pure strategy space. The key technique we use is based
                 on the transformation, projection of a polytope, and
                 the ellipsoid method. This work settles several open
                 questions in security game domain and significantly
                 extends the state-of-the-art of both the polynomial
                 solvable and NP-hard class of the security game.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Li:2017:SYE,
  author =       "Lingda Li and Robel Geda and Ari B. Hayes and Yanhao
                 Chen and Pranav Chaudhari and Eddy Z. Zhang and Mario
                 Szegedy",
  title =        "A Simple Yet Effective Balanced Edge Partition Model
                 for Parallel Computing",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084451",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084451",
  abstract =     "Graph edge partition models have recently become an
                 appealing alternative to graph vertex partition models
                 for distributed computing due to their flexibility in
                 balancing loads and their performance in reducing
                 communication cost [6, 16]. In this paper, we propose a
                 simple yet effective graph edge partitioning algorithm.
                 In practice, our algorithm provides good partition
                 quality (and better than similar state-of-the-art edge
                 partition approaches, at least for power-law graphs)
                 while maintaining low partition overhead. In theory,
                 previous work [6] showed that an approximation
                 guarantee of $ O(d_{\rm max} \sqrt {\log n \log k}) $
                 apply to the graphs with $ m = \Omega (k^2) $ edges
                 ($k$ is the number of partitions). We further
                 rigorously proved that this approximation guarantee
                 hold for all graphs. We show how our edge partition
                 model can be applied to parallel computing. We draw our
                 example from GPU program locality enhancement and
                 demonstrate that the graph edge partition model does
                 not only apply to distributed computing with many
                 computer nodes, but also to parallel computing in a
                 single computer node with a many-core processor.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Zhou:2017:PSM,
  author =       "You Zhou and Yian Zhou and Min Chen and Shigang
                 Chen",
  title =        "Persistent Spread Measurement for Big Network Data
                 Based on Register Intersection",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084452",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084452",
  abstract =     "Persistent spread measurement is to count the number
                 of distinct elements that persist in each network flow
                 for predefined time periods. It has many practical
                 applications, including detecting long-term stealthy
                 network activities in the background of normal-user
                 activities, such as stealthy DDoS attack, stealthy
                 network scan, or faked network trend, which cannot be
                 detected by traditional flow cardinality measurement.
                 With big network data, one challenge is to measure the
                 persistent spreads of a massive number of flows without
                 incurring too much memory overhead as such measurement
                 may be performed at the line speed by network
                 processors with fast but small on-chip memory. We
                 propose a highly compact Virtual Intersection
                 HyperLogLog (VI-HLL) architecture for this purpose. It
                 achieves far better memory efficiency than the best
                 prior work of V-Bitmap, and in the meantime drastically
                 extends the measurement range. Theoretical analysis and
                 extensive experiments demonstrate that VI-HLL provides
                 good measurement accuracy even in very tight memory
                 space of less than 1 bit per flow.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Bondorf:2017:QCD,
  author =       "Steffen Bondorf and Paul Nikolaus and Jens B.
                 Schmitt",
  title =        "Quality and Cost of Deterministic Network Calculus:
                 Design and Evaluation of an Accurate and Fast
                 Analysis",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084453",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084453",
  abstract =     "Networks are integral parts of modern safety-critical
                 systems and certification demands the provision of
                 guarantees for data transmissions. Deterministic
                 Network Calculus (DNC) can compute a worst-case bound
                 on a data flow's end-to-end delay. Accuracy of DNC
                 results has been improved steadily, resulting in two
                 DNC branches: the classical algebraic analysis and the
                 more recent optimization-based analysis. The
                 optimization-based branch provides a theoretical
                 solution for tight bounds. Its computational cost
                 grows, however, (possibly super-)exponentially with the
                 network size. Consequently, a heuristic optimization
                 formulation trading accuracy against computational
                 costs was proposed. In this article, we challenge
                 optimization-based DNC with a new algebraic DNC
                 algorithm. We show that: no current optimization
                 formulation scales well with the network size and
                 algebraic DNC can be considerably improved in both
                 aspects, accuracy and computational cost. To that end,
                 we contribute a novel DNC algorithm that transfers the
                 optimization's search for best attainable delay bounds
                 to algebraic DNC. It achieves a high degree of accuracy
                 and our novel efficiency improvements reduce the cost
                 of the analysis dramatically. In extensive numerical
                 experiments, we observe that our delay bounds deviate
                 from the optimization-based ones by only 1.142\% on
                 average while computation times simultaneously decrease
                 by several orders of magnitude.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Gast:2017:EVE,
  author =       "Nicolas Gast",
  title =        "Expected Values Estimated via Mean-Field Approximation
                 are {$ 1 / N $}-Accurate",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084454",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084454",
  abstract =     "Mean-field approximation is a powerful tool to study
                 large-scale stochastic systems such as data-centers ---
                 one example being the famous power of two-choice
                 paradigm. It is shown in the literature that under
                 quite general conditions, the empirical measure of a
                 system of $N$ interacting objects converges at rate $
                 O(1 \sqrt {N}) $ to a deterministic dynamical system,
                 called its mean-field approximation. In this paper, we
                 revisit the accuracy of mean-field approximation by
                 focusing on expected values. We show that, under almost
                 the same general conditions, the expectation of any
                 performance functional converges at rate $ O(1 / N) $
                 to its mean-field approximation. Our result applies for
                 finite and infinite-dimensional mean-field models. We
                 also develop a new perturbation theory argument that
                 shows that the result holds for the stationary regime
                 if the dynamical system is asymptotically exponentially
                 stable. We provide numerical experiments that
                 demonstrate that this rate of convergence is tight and
                 that illustrate the necessity of our conditions. As an
                 example, we apply our result to the classical
                 two-choice model. By combining our theory with
                 numerical experiments, we claim that, as the load rho
                 goes to 1, the average queue length of a two-choice
                 system with $N$ servers is $ \log 2 1 / (1 - \rho) + 1
                 / (2 N (1 - \rho) + O(1 / N^2))$.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Wang:2017:CMP,
  author =       "Brandon Wang and Xiaoye Li and Leandro P. de Aguiar
                 and Daniel S. Menasche and Zubair Shafiq",
  title =        "Characterizing and Modeling Patching Practices of
                 Industrial Control Systems",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084455",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084455",
  abstract =     "Industrial Control Systems (ICS) are widely deployed
                 in mission critical infrastructures such as
                 manufacturing, energy, and transportation. The mission
                 critical nature of ICS devices poses important security
                 challenges for ICS vendors and asset owners. In
                 particular, the patching of ICS devices is usually
                 deferred to scheduled production outages so as to
                 prevent potential operational disruption of critical
                 systems. Unfortunately, anecdotal evidence suggests
                 that ICS devices are riddled with security
                 vulnerabilities that are not patched in a timely
                 manner, which leaves them vulnerable to exploitation by
                 hackers, nation states, and hacktivist organizations.
                 In this paper, we present the results from our
                 longitudinal measurement and characterization study of
                 ICS patching behavior. Our study is based on IP scan
                 data collected from Shodan over the duration of three
                 years for more than 500 known industrial ICS protocols
                 and products. Our longitudinal measurements reveal the
                 impact of vulnerability disclosures on ICS patching.
                 Our analysis of more than 100 thousand Internet-exposed
                 ICS devices reveals that about 50\% upgrade to newer
                 patched versions within 60 days of a vulnerability
                 disclosure. Based on our measurement and analysis, we
                 further propose a variation of the Bass model to
                 forecast the patching behavior of ICS devices. The
                 evaluation shows that our proposed models have
                 comparable prediction accuracy when contrasted against
                 traditional ARIMA timeseries forecasting models, while
                 requiring less parameters and being amenable to direct
                 physical interpretation.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Formby:2017:CSP,
  author =       "David Formby and Anwar Walid and Raheem Beyah",
  title =        "A Case Study in Power Substation Network Dynamics",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "19:1--19:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084456",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084456",
  abstract =     "The modern world is becoming increasingly dependent on
                 computing and communication technology to function, but
                 unfortunately its application and impact on areas such
                 as critical infrastructure and industrial control
                 system (ICS) networks remains to be thoroughly studied.
                 Significant research has been conducted to address the
                 myriad security concerns in these areas, but they are
                 virtually all based on artificial testbeds or
                 simulations designed on assumptions about their
                 behavior either from knowledge of traditional IT
                 networking or from basic principles of ICS operation.
                 In this work, we provide the most detailed
                 characterization of an example ICS to date in order to
                 determine if these common assumptions hold true. A live
                 power distribution substation is observed over the
                 course of two and a half years to measure its behavior
                 and evolution over time. Then, a horizontal study is
                 conducted that compared this behavior with three other
                 substations from the same company. Although most
                 predictions were found to be correct, some unexpected
                 behavior was observed that highlights the fundamental
                 differences between ICS and IT networks including round
                 trip times dominated by processing speed as opposed to
                 network delay, several well known TCP features being
                 largely irrelevant, and surprisingly large jitter from
                 devices running real-time operating systems. The impact
                 of these observations is discussed in terms of
                 generality to other embedded networks, network security
                 applications, and the suitability of the TCP protocol
                 for this environment.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Nguyen:2017:OIC,
  author =       "Hung T. Nguyen and Tri P. Nguyen and Tam N. Vu and
                 Thang N. Dinh",
  title =        "Outward Influence and Cascade Size Estimation in
                 Billion-scale Networks",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "20:1--20:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084457",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084457",
  abstract =     "Estimating cascade size and nodes' influence is a
                 fundamental task in social, technological, and
                 biological networks. Yet this task is extremely
                 challenging due to the sheer size and the structural
                 heterogeneity of networks. We investigate a new
                 influence measure, termed outward influence (OI),
                 defined as the (expected) number of nodes that a subset
                 of nodes S will activate, excluding the nodes in S.
                 Thus, OI equals, the de facto standard measure,
                 influence spread of S minus | S |. OI is not only more
                 informative for nodes with small influence, but also,
                 critical in designing new effective sampling and
                 statistical estimation methods. Based on OI, we propose
                 SIEA/SOIEA, novel methods to estimate influence
                 spread/outward influence at scale and with rigorous
                 theoretical guarantees. The proposed methods are built
                 on two novel components (1) IICP an important sampling
                 method for outward influence; and (2) RSA, a robust
                 mean estimation method that minimize the number of
                 samples through analyzing variance and range of random
                 variables. Compared to the state-of-the art for
                 influence estimation, SIEA is $ \Omega (\log 4 n) $
                 times faster in theory and up to several orders of
                 magnitude faster in practice. For the first time,
                 influence of nodes in the networks of billions of edges
                 can be estimated with high accuracy within a few
                 minutes. Our comprehensive experiments on real-world
                 networks also give evidence against the popular
                 practice of using a fixed number, e.g. 10K or 20K, of
                 samples to compute the 'ground truth' for influence
                 spread.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Choi:2017:EDL,
  author =       "Wonil Choi and Mohammad Arjomand and Myoungsoo Jung
                 and Mahmut Kandemir",
  title =        "Exploiting Data Longevity for Enhancing the Lifetime
                 of Flash-based Storage Class Memory",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "21:1--21:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084458",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084458",
  abstract =     "Storage-class memory (SCM) combines the benefits of a
                 solid-state memory, such as high-performance and
                 robustness, with the archival capabilities and low cost
                 of conventional hard-disk magnetic storage. Among
                 candidate solid-state nonvolatile memory technologies
                 that could potentially be used to construct SCM, flash
                 memory is a well-established technology and have been
                 widely used in commercially available SCM incarnations.
                 Flash-based SCM enables much better tradeoffs between
                 performance, space and power than disk-based systems.
                 However, write endurance is a significant challenge for
                 a flash-based SCM (each act of writing a bit may
                 slightly damage a cell, so one flash cell can be
                 written 10^4-10^5 times, depending on the flash
                 technology, before it becomes unusable). This is a
                 well-documented problem and has received a lot of
                 attention by manufactures that are using some
                 combination of write reduction and wear-leveling
                 techniques for achieving longer lifetime. In an effort
                 to improve flash lifetime, first, by quantifying data
                 longevity in an SCM, we show that a majority of the
                 data stored in a solid-state SCM do not require long
                 retention times provided by flash memory (i.e., up to
                 10 years in modern devices); second, by exploiting
                 retention time relaxation, we propose a novel
                 mechanism, called Dense-SLC (D-SLC), which enables us
                 perform multiple writes into a cell during each erase
                 cycle for lifetime extension; and finally, we discuss
                 the required changes in the flash management software
                 (FTL) in order to use D-SLC mechanism for extending the
                 lifetime of the solid-state part of an SCM. Using an
                 extensive simulation-based analysis of an SLC
                 flash-based SCM, we demonstrate that D-SLC is able to
                 significantly improve device lifetime (between 5.1X and
                 8.6X) with no performance overhead and also very small
                 changes at the FTL software.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Venkatakrishnan:2017:DRB,
  author =       "Shaileshh Bojja Venkatakrishnan and Giulia Fanti and
                 Pramod Viswanath",
  title =        "{Dandelion}: Redesigning the {Bitcoin} Network for
                 Anonymity",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "22:1--22:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084459",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084459",
  abstract =     "Bitcoin and other cryptocurrencies have surged in
                 popularity over the last decade. Although Bitcoin does
                 not claim to provide anonymity for its users, it enjoys
                 a public perception of being a privacy preserving
                 financial system. In reality, cryptocurrencies publish
                 users' entire transaction histories in plaintext,
                 albeit under a pseudonym; this is required for
                 transaction validation. Therefore, if a user's
                 pseudonym can be linked to their human identity, the
                 privacy fallout can be significant. Recently,
                 researchers have demonstrated deanonymization attacks
                 that exploit weaknesses in the Bitcoin network's
                 peer-to-peer (P2P) networking protocols. In particular,
                 the P2P network currently forwards content in a
                 structured way that allows observers to deanonymize
                 users. In this work, we redesign the P2P network from
                 first principles with the goal of providing strong,
                 provable anonymity guarantees. We propose a simple
                 networking policy called Dandelion which provides
                 quasi-optimal, network-wide anonymity, with minimal
                 cost to the network's utility. We also discuss
                 practical implementation challenges and propose
                 heuristic solutions.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Zhang:2017:OPP,
  author =       "Zijun Zhang and Zongpeng Li and Chuan Wu",
  title =        "Optimal Posted Prices for Online Cloud Resource
                 Allocation",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "23:1--23:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084460",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084460",
  abstract =     "We study online resource allocation in a cloud
                 computing platform through posted pricing: The cloud
                 provider publishes a unit price for each resource type,
                 which may vary over time; upon arrival at the cloud
                 system, a cloud user either takes the current prices,
                 renting resources to execute its job, or refuses the
                 prices without running its job there. We design pricing
                 functions based on current resource utilization ratios,
                 in a wide array of demand-supply relationships and
                 resource occupation durations, and prove worst-case
                 competitive ratios in social welfare. In the basic case
                 of a single-type, non-recycled resource (allocated
                 resources are not later released for reuse), we prove
                 that our pricing function design is optimal, in that it
                 achieves the smallest competitive ratio among all
                 possible pricing functions. Insights obtained from the
                 basic case are then used to generalize the pricing
                 functions to more realistic cloud systems with multiple
                 types of resources, where a job occupies allocated
                 resources for a number of time slots till completion,
                 upon which time the resources are returned to the cloud
                 resource pool.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Sun:2017:ASM,
  author =       "Wen Sun and Veronique Simon and Sebastien Monnet and
                 Philippe Robert and Pierre Sens",
  title =        "Analysis of a Stochastic Model of Replication in Large
                 Distributed Storage Systems: A Mean-Field Approach",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "24:1--24:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084462",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084462",
  abstract =     "Distributed storage systems such as Hadoop File System
                 or Google File System (GFS) ensure data availability
                 and durability using replication. Persistence is
                 achieved by replicating the same data block on several
                 nodes, and ensuring that a minimum number of copies are
                 available on the system at any time. Whenever the
                 contents of a node are lost, for instance due to a hard
                 disk crash, the system regenerates the data blocks
                 stored before the failure by transferring them from the
                 remaining replicas. This paper is focused on the
                 analysis of the efficiency of replication mechanism
                 that determines the location of the copies of a given
                 file at some server. The variability of the loads of
                 the nodes of the network is investigated for several
                 policies. Three replication mechanisms are tested
                 against simulations in the context of a real
                 implementation of a such a system: Random, Least Loaded
                 and Power of Choice. The simulations show that some of
                 these policies may lead to quite unbalanced situations:
                 if $ \beta $ is the average number of copies per node
                 it turns out that, at equilibrium, the load of the
                 nodes may exhibit a high variability. It is shown in
                 this paper that a simple variant of a power of choice
                 type algorithm has a striking effect on the loads of
                 the nodes: at equilibrium, the distribution of the load
                 of a node has a bounded support, most of nodes have a
                 load less than $ 2 \beta $ which is an interesting
                 property for the design of the storage space of these
                 systems. Stochastic models are introduced and
                 investigated to explain this interesting phenomenon.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Mukherjee:2017:OSE,
  author =       "Debankur Mukherjee and Souvik Dhara and Sem C. Borst
                 and Johan S. H. van Leeuwaarden",
  title =        "Optimal Service Elasticity in Large-Scale Distributed
                 Systems",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "25:1--25:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084463",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084463",
  abstract =     "A fundamental challenge in large-scale cloud networks
                 and data centers is to achieve highly efficient server
                 utilization and limit energy consumption, while
                 providing excellent user-perceived performance in the
                 presence of uncertain and time-varying demand patterns.
                 Auto-scaling provides a popular paradigm for
                 automatically adjusting service capacity in response to
                 demand while meeting performance targets, and
                 queue-driven auto-scaling techniques have been widely
                 investigated in the literature. In typical data center
                 architectures and cloud environments however, no
                 centralized queue is maintained, and load balancing
                 algorithms immediately distribute incoming tasks among
                 parallel queues. In these distributed settings with
                 vast numbers of servers, centralized queue-driven
                 auto-scaling techniques involve a substantial
                 communication overhead and major implementation burden,
                 or may not even be viable at all. Motivated by the
                 above issues, we propose a joint auto-scaling and load
                 balancing scheme which does not require any global
                 queue length information or explicit knowledge of
                 system parameters, and yet provides provably
                 near-optimal service elasticity. We establish the
                 fluid-level dynamics for the proposed scheme in a
                 regime where the total traffic volume and nominal
                 service capacity grow large in proportion. The
                 fluid-limit results show that the proposed scheme
                 achieves asymptotic optimality in terms of
                 user-perceived delay performance as well as energy
                 consumption. Specifically, we prove that both the
                 waiting time of tasks and the relative energy portion
                 consumed by idle servers vanish in the limit. At the
                 same time, the proposed scheme operates in a
                 distributed fashion and involves only constant
                 communication overhead per task, thus ensuring
                 scalability in massive data center operations.
                 Extensive simulation experiments corroborate the
                 fluid-limit results, and demonstrate that the proposed
                 scheme can match the user performance and energy
                 consumption of state-of-the-art approaches that do take
                 full advantage of a centralized queue.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Lee:2017:DIL,
  author =       "Donghyuk Lee and Samira Khan and Lavanya Subramanian
                 and Saugata Ghose and Rachata Ausavarungnirun and
                 Gennady Pekhimenko and Vivek Seshadri and Onur Mutlu",
  title =        "Design-Induced Latency Variation in Modern {DRAM}
                 Chips: Characterization, Analysis, and Latency
                 Reduction Mechanisms",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "26:1--26:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084464",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084464",
  abstract =     "Variation has been shown to exist across the cells
                 within a modern DRAM chip. Prior work has studied and
                 exploited several forms of variation, such as
                 manufacturing-process- or temperature-induced
                 variation. We empirically demonstrate a new form of
                 variation that exists within a real DRAM chip, induced
                 by the design and placement of different components in
                 the DRAM chip: different regions in DRAM, based on
                 their relative distances from the peripheral
                 structures, require different minimum access latencies
                 for reliable operation. In particular, we show that in
                 most real DRAM chips, cells closer to the peripheral
                 structures can be accessed much faster than cells that
                 are farther. We call this phenomenon design-induced
                 variation in DRAM. Our goals are to (i) understand
                 design-induced variation that exists in real,
                 state-of-the-art DRAM chips, (ii) exploit it to develop
                 low-cost mechanisms that can dynamically find and use
                 the lowest latency at which to operate a DRAM chip
                 reliably, and, thus, (iii) improve overall system
                 performance while ensuring reliable system operation.
                 To this end, we first experimentally demonstrate and
                 analyze designed-induced variation in modern DRAM
                 devices by testing and characterizing 96 DIMMs (768
                 DRAM chips). Our characterization identifies DRAM
                 regions that are vulnerable to errors, if operated at
                 lower latency, and finds consistency in their locations
                 across a given DRAM chip generation, due to
                 design-induced variation. Based on our extensive
                 experimental analysis, we develop two mechanisms that
                 reliably reduce DRAM latency. First, DIVA Profiling
                 uses runtime profiling to dynamically identify the
                 lowest DRAM latency that does not introduce failures.
                 DIVA Profiling exploits design-induced variation and
                 periodically profiles only the vulnerable regions to
                 determine the lowest DRAM latency at low cost. It is
                 the first mechanism to dynamically determine the lowest
                 latency that can be used to operate DRAM reliably. DIVA
                 Profiling reduces the latency of read/write requests by
                 35.1\% / 57.8\%, respectively, at 55$^\ocirc $C. Our
                 second mechanism, DIVA Shuffling, shuffles data such
                 that values stored in vulnerable regions are mapped to
                 multiple error-correcting code (ECC) codewords. As a
                 result, DIVA Shuffling can correct 26\% more multi-bit
                 errors than conventional ECC. Combined together, our
                 two mechanisms reduce read/write latency by
                 40.0\%/60.5\%, which translates to an overall system
                 performance improvement of 14.7\%/13.7\%/13.8\% (in
                 2-/4-/8-core systems) across a variety of workloads,
                 while ensuring reliable operation.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}

@Article{Avrachenkov:2017:LCA,
  author =       "Konstantin Avrachenkov and Jasper Goseling and Berksan
                 Serbetci",
  title =        "A Low-Complexity Approach to Distributed Cooperative
                 Caching with Geographic Constraints",
  journal =      j-POMACS,
  volume =       "1",
  number =       "1",
  pages =        "27:1--27:??",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3084465",
  ISSN =         "2476-1249",
  ISSN-L =       "2476-1249",
  bibdate =      "Fri Jun 16 09:11:52 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pomacs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3084465",
  abstract =     "We consider caching in cellular networks in which each
                 base station is equipped with a cache that can store a
                 limited number of files. The popularity of the files is
                 known and the goal is to place files in the caches such
                 that the probability that a user at an arbitrary
                 location in the plane will find the file that she
                 requires in one of the covering caches is maximized. We
                 develop distributed asynchronous algorithms for
                 deciding which contents to store in which cache. Such
                 cooperative algorithms require communication only
                 between caches with overlapping coverage areas and can
                 operate in asynchronous manner. The development of the
                 algorithms is principally based on an observation that
                 the problem can be viewed as a potential game. Our
                 basic algorithm is derived from the best response
                 dynamics. We demonstrate that the complexity of each
                 best response step is independent of the number of
                 files, linear in the cache capacity and linear in the
                 maximum number of base stations that cover a certain
                 area. Then, we show that the overall algorithm
                 complexity for a discrete cache placement is polynomial
                 in both network size and catalog size. In practical
                 examples, the algorithm converges in just a few
                 iterations. Also, in most cases of interest, the basic
                 algorithm finds the best Nash equilibrium corresponding
                 to the global optimum. We provide two extensions of our
                 basic algorithm based on stochastic and deterministic
                 simulated annealing which find the global optimum.
                 Finally, we demonstrate the hit probability evolution
                 on real and synthetic networks numerically and show
                 that our distributed caching algorithm performs
                 significantly better than storing the most popular
                 content, probabilistic content placement policy and
                 Multi-LRU caching policies.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "Proceedings of the ACM on Measurement and Analysis of
                 Computing Systems (POMACS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1567",
}