Valid HTML 4.0! Valid CSS!
%%% -*-BibTeX-*-
%%% ====================================================================
%%%  BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.02",
%%%     date            = "21 September 2019",
%%%     time            = "07:26:07 MDT",
%%%     filename        = "tompecs.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "55198 4205 24203 225862",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "ACM Transactions on Modeling and Performance
%%%                        Evaluation of Computing Systems (TOMPECS);
%%%                        bibliography; BibTeX",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        ACM Transactions on Modeling and Performance
%%%                        Evaluation of Computing Systems (TOMPECS)
%%%                        (CODEN ????, ISSN 2376-3639 (print),
%%%                        2376-3647 (electronic)).  The journal appears
%%%                        quarterly, and publication began with volume
%%%                        1, number 1, in March 2016.
%%%
%%%                        At version 1.02, the COMPLETE journal
%%%                        coverage looked like this:
%%%
%%%                             2016 (  26)    2018 (  22)
%%%                             2017 (  17)    2019 (  18)
%%%
%%%                             Article:         83
%%%
%%%                             Total entries:   83
%%%
%%%                        The journal Web page can be found at:
%%%
%%%                            http://tompecs.acm.org/
%%%
%%%                        The journal table of contents page is at:
%%%
%%%                            http://dl.acm.org/pub.cfm?id=J1525
%%%
%%%                        Qualified subscribers can retrieve the full
%%%                        text of recent articles in PDF form.
%%%
%%%                        The initial draft was extracted from the ACM
%%%                        Web pages.
%%%
%%%                        ACM copyrights explicitly permit abstracting
%%%                        with credit, so article abstracts, keywords,
%%%                        and subject classifications have been
%%%                        included in this bibliography wherever
%%%                        available.  Article reviews have been
%%%                        omitted, until their copyright status has
%%%                        been clarified.
%%%
%%%                        bibsource keys in the bibliography entries
%%%                        below indicate the entry originally came
%%%                        from the computer science bibliography
%%%                        archive, even though it has likely since
%%%                        been corrected and updated.
%%%
%%%                        URL keys in the bibliography point to
%%%                        World Wide Web locations of additional
%%%                        information about the entry.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, using ``bibsort -byvolume.''
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility.",
%%%  }
%%% ====================================================================
@Preamble{"\input bibnames.sty" #
    "\def \TM {${}^{\sc TM}$}"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:
@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:
@String{j-TOMPECS               = "ACM Transactions on Modeling and Performance
                                  Evaluation of Computing Systems (TOMPECS)"}

%%% ====================================================================
%%% Bibliography entries:
@Article{Towsley:2016:I,
  author =       "Don Towsley and Carey Williamson",
  title =        "Introduction",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "1",
  pages =        "1:1--1:1",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2893179",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:29:10 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2893179",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Lashgar:2016:ESM,
  author =       "Ahmad Lashgar and Amirali Baniasadi",
  title =        "Employing Software-Managed Caches in {OpenACC}:
                 Opportunities and Benefits",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "1",
  pages =        "2:1--2:34",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2798724",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:29:10 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2798724",
  abstract =     "The OpenACC programming model has been developed to
                 simplify accelerator programming and improve
                 development productivity. In this article, we
                 investigate the main limitations faced by OpenACC in
                 harnessing all capabilities of GPU-like accelerators.
                 We build on our findings and discuss the opportunity to
                 exploit a software-managed cache as (i) a fast
                 communication medium and (ii) a cache for data reuse.
                 To this end, we propose a new directive and
                 communication model for OpenACC. Investigating several
                 benchmarks, we show that the proposed directive can
                 improve performance up to $ 2.54 \times $, and at the
                 cost of minor programming effort.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Zhang:2016:VSL,
  author =       "Deli Zhang and Jeremiah Wilke and Gilbert Hendry and
                 Damian Dechev",
  title =        "Validating the Simulation of Large-Scale Parallel
                 Applications Using Statistical Characteristics",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "1",
  pages =        "3:1--3:22",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2809778",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:29:10 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2809778",
  abstract =     "Simulation is a widely adopted method to analyze and
                 predict the performance of large-scale parallel
                 applications. Validating the hardware model is highly
                 important for complex simulations with a large number
                 of parameters. Common practice involves calculating the
                 percent error between the projected and the real
                 execution time of a benchmark program. However, in a
                 high-dimensional parameter space, this coarse-grained
                 approach often suffers from parameter insensitivity,
                 which may not be known a priori. Moreover, the
                 traditional approach cannot be applied to the
                 validation of software models, such as application
                 skeletons used in online simulations. In this work, we
                 present a methodology and a toolset for validating both
                 hardware and software models by quantitatively
                 comparing fine-grained statistical characteristics
                 obtained from execution traces. Although statistical
                 information has been used in tasks like performance
                 optimization, this is the first attempt to apply it to
                 simulation validation. Our experimental results show
                 that the proposed evaluation approach offers
                 significant improvement in fidelity when compared to
                 evaluation using total execution time, and the proposed
                 metrics serve as reliable criteria that progress toward
                 automating the simulation tuning process.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Roos:2016:DDE,
  author =       "Stefanie Roos and Thorsten Strufe",
  title =        "Dealing with Dead Ends: Efficient Routing in
                 Darknets",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "1",
  pages =        "4:1--4:30",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2809779",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:29:10 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2809779",
  abstract =     "Darknets, membership-concealing peer-to-peer networks,
                 suffer from high message delivery delays due to
                 insufficient routing strategies. They form topologies
                 restricted to a subgraph of the social network of their
                 users by limiting connections to peers with a mutual
                 trust relationship in real life. Whereas centralized,
                 highly successful social networking services entail a
                 privacy loss of their users, Darknets at higher
                 performance represent an optimal private and
                 censorship-resistant communication substrate for social
                 applications. Decentralized routing so far has been
                 analyzed under the assumption that the network
                 resembles a perfect lattice structure. Freenet,
                 currently the only widely used Darknet, attempts to
                 approximate this structure by embedding the social
                 graph into a metric space. Considering the resulting
                 distortion, the common greedy routing algorithm is
                 adapted to account for local optima. Yet the impact of
                 the adaptation has not been adequately analyzed. We
                 thus suggest a model integrating inaccuracies in the
                 embedding. In the context of this model, we show that
                 the Freenet routing algorithm cannot achieve polylog
                 performance. Consequently, we design NextBestOnce, a
                 provable poylog algorithm based only on information
                 about neighbors. Furthermore, we show that the routing
                 length of NextBestOnce is further decreased by more
                 than a constant factor if neighbor-of-neighbor
                 information is included in the decision process.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Izagirre:2016:STA,
  author =       "A. Izagirre and U. Ayesta and I. M. Verloop",
  title =        "Sojourn Time Approximations for a Discriminatory
                 Processor Sharing Queue",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "1",
  pages =        "5:1--5:31",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2812807",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:29:10 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2812807",
  abstract =     "We study a multiclass time-sharing discipline with
                 relative priorities known as discriminatory processor
                 sharing (DPS), which provides a natural framework to
                 model service differentiation in systems. The analysis
                 of DPS is extremely challenging, and analytical results
                 are scarce. We develop closed-form approximations for
                 the mean conditional (on the service requirement) and
                 unconditional sojourn times. The main benefits of the
                 approximations lie in its simplicity, the fact that it
                 applies for general service requirements with finite
                 second moments, and that it provides insights into the
                 dependency of the performance on the system parameters.
                 We show that the approximation for the mean conditional
                 and unconditional sojourn time of a customer is
                 decreasing as its relative priority increases. We also
                 show that the approximation is exact in various
                 scenarios, and that it is uniformly bounded in the
                 second moments of the service requirements. Finally, we
                 numerically illustrate that the approximation for
                 exponential, hyperexponential, and Pareto service
                 requirements is accurate across a broad range of
                 parameters.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Harrison:2016:EPT,
  author =       "Peter G. Harrison and Naresh M. Patel and William J.
                 Knottenbelt",
  title =        "Energy--Performance Trade-Offs via the {EP} Queue",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "2",
  pages =        "6:1--6:31",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2818726",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:54 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2818726",
  abstract =     "We introduce the EP queue -- a significant
                 generalization of the M B / G /1 queue that has
                 state-dependent service time probability distributions
                 and incorporates power-up for first arrivals and
                 power-down for idle periods. We derive exact results
                 for the busy-time and response-time distributions. From
                 these, we derive power consumption metrics during
                 nonidle periods and overall response time metrics,
                 which together provide a single measure of the
                 trade-off between energy and performance. We illustrate
                 these trade-offs for some policies and show how
                 numerical results can provide insights into system
                 behavior. The EP queue has application to storage
                 systems, especially hard disks, and other data-center
                 components such as compute servers, networking, and
                 even hyperconverged infrastructure.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Tavakkol:2016:PED,
  author =       "Arash Tavakkol and Pooyan Mehrvarzy and Mohammad
                 Arjomand and Hamid Sarbazi-Azad",
  title =        "Performance Evaluation of Dynamic Page Allocation
                 Strategies in {SSDs}",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "2",
  pages =        "7:1--7:33",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2829974",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:54 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2829974",
  abstract =     "Solid-state drives (SSDs) with tens of NAND flash
                 chips and highly parallel architectures are widely used
                 in enterprise and client storage systems. As any write
                 operation in NAND flash is preceded by a slow erase
                 operation, an out-of-place update mechanism is used to
                 distribute writes through SSD storage space to postpone
                 erase operations as far as possible. SSD controllers
                 use a mapping table along with a specific allocation
                 strategy to map logical host addresses to physical page
                 addresses within storage space. The allocation strategy
                 is further responsible for accelerating I/O operations
                 through better striping of physical addresses over SSD
                 parallel resources. Proposals already exist for using
                 static logical-to-physical address mapping that does
                 not balance the I/O traffic load within the SSD, and
                 its efficiency highly depends on access patterns. A
                 more balanced distribution of I/O operations is to
                 alternate resource allocation in a round-robin manner
                 irrespective of logical addresses. The number of
                 resources that can be dynamically allocated in this
                 fashion is defined as the degree of freedom, and to the
                 best of our knowledge, there has been no research thus
                 far to show what happens if different degrees of
                 freedom are used in allocation strategy. This article
                 explores the possibility of using dynamic resource
                 allocation and identifies key design opportunities that
                 it presents to improve SSD performance. Specifically,
                 using steady-state analysis of SSDs, we show that
                 dynamism helps to mitigate performance and endurance
                 overheads of garbage collection. Our steady-state
                 experiments indicate that midrange/high-end SSDs with
                 dynamic allocation can provide I/O operations per
                 second (IOPS) improvement of up to 3.3x/9.6x, response
                 time improvement of up to 56\%/32\%, and about
                 88\%/96\% average reduction in the standard deviation
                 of erase counts of NAND flash blocks.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Chang:2016:CRA,
  author =       "Cheng-Shang Chang and Jay Cheng and Tien-Ke Huang and
                 Duan-Shin Lee and Cheng-Yu Chen",
  title =        "Coding Rate Analysis of Forbidden Overlap Codes in
                 High-Speed Buses",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "2",
  pages =        "8:1--8:25",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2846091",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:54 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2846091",
  abstract =     "One of the main problems in deep submicron designs of
                 high-speed buses is propagation delay due to the
                 crosstalk effect. To alleviate the crosstalk effect,
                 there are several types of crosstalk avoidance codes
                 proposed in the literature. In this article, we analyze
                 the coding rates of forbidden overlap codes (FOCs) that
                 avoid ``010 $ \to $ 101'' transition and ``101 $ \to $
                 010'' transition on any three adjacent wires in a bus.
                 We first compute the maximum achievable coding rate of
                 FOCs and the maximum coding rate of memoryless FOCs.
                 Our numerical results show that there is a significant
                 gap between the maximum coding rate of memoryless FOCs
                 and the maximum achievable rate. We then analyze the
                 coding rates of FOCs generated from the bit-stuffing
                 algorithm. Our worst-case analysis yields a tight lower
                 bound of the coding rate of the bit-stuffing algorithm.
                 Under the assumption of Bernoulli inputs, we use a
                 Markov chain model to compute the coding rate of a bus
                 with n wires under the bit-stuffing algorithm. The main
                 difficulty of solving such a Markov chain model is that
                 the number of states grows exponentially with respect
                 to the number of wires n. To tackle the problem of the
                 curse of dimensionality, we derive an approximate
                 analysis that leads to a recursive closed-form formula
                 for the coding rate over the n th wire. Our
                 approximations match extremely well with the numerical
                 results from solving the original Markov chain for $ n
                 \leq 10 $ and the simulation results for $ n \leq 3000
                 $. Our analysis of coding rates of FOCs could be
                 helpful in understanding the trade-off between
                 propagation delay and coding rate among various
                 crosstalk avoidance codes in the literature. In
                 comparison with the forbidden transition codes (FTCs)
                 that have shorter propagation delay than that of FOCs,
                 our numerical results show that the coding rates of
                 FOCs are much higher than those of FTCs.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Bermolen:2016:ETP,
  author =       "Paola Bermolen and Matthieu Jonckheere and Federico
                 Larroca and Pascal Moyal",
  title =        "Estimating the Transmission Probability in Wireless
                 Networks with Configuration Models",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "2",
  pages =        "9:1--9:23",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2858795",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:54 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2858795",
  abstract =     "We propose a new methodology to estimate the
                 probability of successful transmissions for random
                 access scheduling in wireless networks, in particular
                 those using Carrier Sense Multiple Access (CSMA).
                 Instead of focusing on spatial configurations of users,
                 we model the interference between users as a random
                 graph. Using configuration models for random graphs, we
                 show how the properties of the medium access mechanism
                 are captured by some deterministic differential
                 equations when the size of the graph gets large.
                 Performance indicators such as the probability of
                 connection of a given node can then be efficiently
                 computed from these equations. We also perform
                 simulations to illustrate the results on different
                 types of random graphs. Even on spatial structures,
                 these estimates get very accurate as soon as the
                 variance of the interference is not negligible.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Yan:2016:PAF,
  author =       "Feng Yan and Xenia Mountrouidou and Alma Riska and
                 Evgenia Smirni",
  title =        "{PREFiguRE}: An Analytic Framework for {HDD}
                 Management",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "3",
  pages =        "10:1--10:27",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2872331",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:54 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2872331",
  abstract =     "Low disk drive utilization suggests that placing the
                 drive into a power saving mode during idle times may
                 decrease power consumption. We present PREFiguRE, a
                 robust framework that aims at harvesting future idle
                 intervals for power savings while meeting strict
                 quality constraints: first, it contains potential
                 delays in serving IO requests that occur during power
                 savings since the time to bring up the disk is not
                 negligible, and second, it ensures that the power
                 saving mechanism is triggered a few times only, such
                 that the disk wear-out due to powering up and down does
                 not compromise the disk's lifetime. PREFiguRE is based
                 on an analytic methodology that uses the histogram of
                 idle times to determine schedules for power saving
                 modes as a function of the preceding constraints.
                 PREFiguRE facilitates analysis for the evaluation of
                 the trade-offs between power savings and quality
                 targets for the current workload. Extensive
                 experimentation on a set of enterprise storage traces
                 illustrates PREFiguRE's effectiveness to consistently
                 achieve high power savings without undermining disk
                 reliability and performance.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Antunes:2016:EFD,
  author =       "Nelson Antunes and Vladas Pipiras",
  title =        "Estimation of Flow Distributions from Sampled
                 Traffic",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "3",
  pages =        "11:1--11:28",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2891106",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:54 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2891106",
  abstract =     "This work addresses the problem of estimating the
                 distributions of packet flow sizes and durations under
                 several methods of sampling packets. Two approaches,
                 one based on inversion and the other on asymptotics,
                 are considered. For the duration distribution, in
                 particular, both approaches require modeling the
                 structure of flows, with the duration distribution
                 being characterized in terms of the IATs (interarrival
                 times between packets) and size distributions of a
                 flow. The inversion of the flow IAT distribution from
                 sampled flow quantities, along with the inversion of
                 the flow size distribution (already used in the
                 literature) allows estimating the flow duration
                 distribution. Motivated by the limitations of the
                 inversion approach in estimating the distribution tails
                 for some sampling methods, an asymptotic approach is
                 developed to estimate directly the distribution tails
                 of flow durations and sizes from sampled quantities.
                 The adequacy of both approaches to estimate the flow
                 distributions is checked against two real Internet
                 traces.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Garetto:2016:UAP,
  author =       "Michele Garetto and Emilio Leonardi and Valentina
                 Martina",
  title =        "A Unified Approach to the Performance Analysis of
                 Caching Systems",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "3",
  pages =        "12:1--12:28",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2896380",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:54 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2896380",
  abstract =     "We propose a unified methodology to analyze the
                 performance of caches (both isolated and
                 interconnected), by extending and generalizing a
                 decoupling technique originally known as Che's
                 approximation, which provides very accurate results at
                 low computational cost. We consider several caching
                 policies (including a very attractive one, called k
                 -LRU), taking into account the effects of temporal
                 locality. In the case of interconnected caches, our
                 approach allows us to do better than the Poisson
                 approximation commonly adopted in prior work. Our
                 results, validated against simulations and trace-driven
                 experiments, provide interesting insights into the
                 performance of caching systems.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Xie:2016:DAI,
  author =       "Hong Xie and John C. S. Lui and Don Towsley",
  title =        "Design and Analysis of Incentive and Reputation
                 Mechanisms for Online Crowdsourcing Systems",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "3",
  pages =        "13:1--13:27",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2897510",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:54 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2897510",
  abstract =     "Today, online crowdsourcing services like Amazon
                 Mechanical Turk, UpWork, and Yahoo! Answers are gaining
                 in popularity. For such online services, it is
                 important to attract ``workers'' to provide
                 high-quality solutions to the ``tasks'' outsourced by
                 ``requesters.'' The challenge is that workers have
                 different skill sets and can provide different amounts
                 of effort. In this article, we design a class of
                 incentive and reputation mechanisms to solicit
                 high-quality solutions from workers. Our incentive
                 mechanism allows multiple workers to solve a task,
                 splits the reward among workers based on requester
                 evaluations of the solution quality, and guarantees
                 that high-skilled workers provide high-quality
                 solutions. However, our incentive mechanism suffers the
                 potential risk that a requester will eventually
                 collects low-quality solutions due to fundamental
                 limitations in task assigning accuracy. Our reputation
                 mechanism ensures that low-skilled workers do not
                 provide low-quality solutions by tracking workers'
                 historical contributions and penalizing those workers
                 having poor reputations. We show that by coupling our
                 reputation mechanism with our incentive mechanism, a
                 requester can collect at least one high-quality
                 solution. We present an optimization framework to
                 select parameters for our reputation mechanism. We show
                 that there is a trade-off between system efficiency
                 (i.e., the number of tasks that can be solved for a
                 given reward) and revenue (i.e., the amount of
                 transaction fees), and we present the optimal trade-off
                 curve between system efficiency and revenue. We
                 demonstrate the applicability and effectiveness of our
                 mechanisms through experiments using a real-world
                 dataset from UpWork. We infer model parameters from
                 this data, use them to determine proper rewards, and
                 select the parameters of our incentive and reputation
                 mechanisms for UpWork. Experimental results show that
                 our incentive and reputation mechanisms achieve 98.82\%
                 of the maximum system efficiency while only sacrificing
                 4\% of revenue.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Ghaderi:2016:SSS,
  author =       "Javad Ghaderi and Sanjay Shakkottai and R. Srikant",
  title =        "Scheduling Storms and Streams in the Cloud",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "4",
  pages =        "14:1--14:28",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2904080",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2904080",
  abstract =     "Motivated by emerging big streaming data processing
                 paradigms (e.g., Twitter Storm, Streaming MapReduce),
                 we investigate the problem of scheduling graphs over a
                 large cluster of servers. Each graph is a job, where
                 nodes represent compute tasks and edges indicate data
                 flows between these compute tasks. Jobs (graphs) arrive
                 randomly over time and, upon completion, leave the
                 system. When a job arrives, the scheduler needs to
                 partition the graph and distribute it over the servers
                 to satisfy load balancing and cost considerations.
                 Specifically, neighboring compute tasks in the graph
                 that are mapped to different servers incur load on the
                 network; thus a mapping of the jobs among the servers
                 incurs a cost that is proportional to the number of
                 ``broken edges.'' We propose a low-complexity
                 randomized scheduling algorithm that, without service
                 preemptions, stabilizes the system with graph
                 arrivals/departures; more importantly, it allows a
                 smooth tradeoff between minimizing average partitioning
                 cost and average queue lengths. Interestingly, to avoid
                 service preemptions, our approach does not rely on a
                 Gibbs sampler; instead, we show that the corresponding
                 limiting invariant measure has an interpretation
                 stemming from a loss system.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Papadopoulos:2016:PPE,
  author =       "Alessandro Vittorio Papadopoulos and Ahmed Ali-Eldin
                 and Karl-Erik {\AA}rz{\'e}n and Johan Tordsson and Erik
                 Elmroth",
  title =        "{PEAS}: A Performance Evaluation Framework for
                 Auto-Scaling Strategies in Cloud Applications",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "4",
  pages =        "15:1--15:31",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2930659",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2930659",
  abstract =     "Numerous auto-scaling strategies have been proposed in
                 the past few years for improving various Quality of
                 Service (QoS) indicators of cloud applications, for
                 example, response time and throughput, by adapting the
                 amount of resources assigned to the application to meet
                 the workload demand. However, the evaluation of a
                 proposed auto-scaler is usually achieved through
                 experiments under specific conditions and seldom
                 includes extensive testing to account for uncertainties
                 in the workloads and unexpected behaviors of the
                 system. These tests by no means can provide guarantees
                 about the behavior of the system in general conditions.
                 In this article, we present a Performance Evaluation
                 framework for Auto-Scaling (PEAS) strategies in the
                 presence of uncertainties. The evaluation is formulated
                 as a chance constrained optimization problem, which is
                 solved using scenario theory. The adoption of such a
                 technique allows one to give probabilistic guarantees
                 of the obtainable performance. Six different
                 auto-scaling strategies have been selected from the
                 literature for extensive test evaluation and compared
                 using the proposed framework. We build a discrete event
                 simulator and parameterize it based on real
                 experiments. Using the simulator, each auto-scaler's
                 performance is evaluated using 796 distinct real
                 workload traces from projects hosted on the Wikimedia
                 foundations' servers, and their performance is compared
                 using PEAS. The evaluation is carried out using
                 different performance metrics, highlighting the
                 flexibility of the framework, while providing
                 probabilistic bounds on the evaluation and the
                 performance of the algorithms. Our results highlight
                 the problem of generalizing the conclusions of the
                 original published studies and show that based on the
                 evaluation criteria, a controller can be shown to be
                 better than other controllers.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Fricker:2016:AOS,
  author =       "Christine Fricker and Fabrice Guillemin and Philippe
                 Robert and Guilherme Thompson",
  title =        "Analysis of an Offloading Scheme for Data Centers in
                 the Framework of Fog Computing",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "4",
  pages =        "16:1--16:18",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950047",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2950047",
  abstract =     "In the context of fog computing, we consider a simple
                 case where data centers are installed at the edge of
                 the network and assume that if a request arrives at an
                 overloaded data center, then it is forwarded to a
                 neighboring data center with some probability. Data
                 centers are assumed to have a large number of servers,
                 and traffic at some of them is assumed to cause
                 saturation. In this case, the other data centers may
                 help to cope with this saturation regime by accepting
                 some of the rejected requests. Our aim is to
                 qualitatively estimate the gain achieved via
                 cooperation between neighboring data centers. After
                 proving some convergence results related to the scaling
                 limits of loss systems for the process describing the
                 number of free servers at both data centers, we show
                 that the performance of the system can be expressed in
                 terms of the invariant distribution of a random walk in
                 the quarter plane. By using and developing existing
                 results in the technical literature, explicit formulas
                 for the blocking rates of such a system are derived.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{DeCicco:2016:CTA,
  author =       "Luca {De Cicco} and Yixi Gong and Dario Rossi and
                 Emilio Leonardi",
  title =        "A Control-Theoretic Analysis of Low-Priority
                 Congestion Control Reprioritization under {AQM}",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "4",
  pages =        "17:1--17:33",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2934652",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2934652",
  abstract =     "Recently, a negative interplay has been shown to arise
                 when scheduling/Active Queue Management (AQM)
                 techniques and low-priority congestion control
                 protocols are used together; namely, AQM resets the
                 relative level of priority among congestion control
                 protocols. This work explores this issue by carrying
                 out a control-theoretic analysis of the dynamical
                 system to prove some fundamental properties that fully
                 characterize the reprioritization phenomenon. In
                 particular, (i) we provide the closed-form solution of
                 the equilibrium in the open loop (i.e., fixing a target
                 loss probability p ); (ii) we provide a stability
                 analysis and a characterization of the reprioritization
                 phenomenon when closing the loop with AQM (i.e., that
                 dynamically adjusts the system loss probability). Our
                 results are important as the characterization of the
                 reprioritization phenomenon is not only quantitatively
                 accurate for the specific protocols and AQM considered
                 but also qualitatively accurate for a broader range of
                 congestion control protocol and AQM combinations.
                 Finally, while we find a sufficient condition to avoid
                 the reprioritization phenomenon, we also show, at the
                 same time, such conditions to be likely impractical:
                 Therefore, we propose a simple and practical
                 system-level solution that is able to reinstate
                 priorities among protocols.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Zhang:2016:TIM,
  author =       "Linquan Zhang and Shaolei Ren and Chuan Wu and
                 Zongpeng Li",
  title =        "A Truthful Incentive Mechanism for Emergency Demand
                 Response in Geo-Distributed Colocation Data Centers",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "4",
  pages =        "18:1--18:23",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950046",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2950046",
  abstract =     "Data centers are key participants in demand response
                 programs, including emergency demand response (EDR), in
                 which the grid coordinates consumers of large amounts
                 of electricity for demand reduction in emergency
                 situations to prevent major economic losses. While
                 existing literature concentrates on owner-operated data
                 centers, this work studies EDR in geo-distributed
                 multitenant colocation data centers in which servers
                 are owned and managed by individual tenants. EDR in
                 colocation data centers is significantly more
                 challenging due to lack of incentives to reduce energy
                 consumption by tenants who control their servers and
                 are typically on fixed power contracts with the
                 colocation operator. Consequently, to achieve demand
                 reduction goals set by the EDR program, the operator
                 has to rely on the highly expensive and/or
                 environmentally unfriendly on-site energy
                 backup/generation. To reduce cost and environmental
                 impact, an efficient incentive mechanism is therefore
                 needed, motivating tenants' voluntary energy reduction
                 in the case of EDR. This work proposes a novel
                 incentive mechanism, Truth-DR, which leverages a
                 reverse auction to provide monetary remuneration to
                 tenants according to their agreed energy reduction.
                 Truth-DR is computationally efficient, truthful, and
                 achieves 2-approximation in colocation-wide social
                 cost. Trace-driven simulations verify the efficacy of
                 the proposed auction mechanism.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Fu:2016:FPP,
  author =       "Yongquan Fu and Ernst Biersack",
  title =        "False-Positive Probability and Compression
                 Optimization for Tree-Structured {Bloom} Filters",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "4",
  pages =        "19:1--19:39",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2940324",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2940324",
  abstract =     "Bloom filters are frequently used to to check the
                 membership of an item in a set. However, Bloom filters
                 face a dilemma: the transmission bandwidth and the
                 accuracy cannot be optimized simultaneously. This
                 dilemma is particularly severe for transmitting Bloom
                 filters to remote nodes when the network bandwidth is
                 limited. We propose a novel Bloom filter called
                 BloomTree that consists of a tree-structured
                 organization of smaller Bloom filters, each using a set
                 of independent hash functions. BloomTree spreads items
                 across levels that are compressed to reduce the
                 transmission bandwidth need. We show how to find
                 optimal configurations for BloomTree and investigate in
                 detail by how much BloomTree outperforms the standard
                 Bloom filter or the compressed Bloom filter. Finally,
                 we use the intersection of BloomTrees to predict the
                 set intersection, decreasing the false-positive
                 probabilities by several orders of magnitude compared
                 to both the compressed Bloom filter and the standard
                 Bloom filter.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Anonymous:2016:LR,
  author =       "Anonymous",
  title =        "List of Reviewers",
  journal =      j-TOMPECS,
  volume =       "1",
  number =       "4",
  pages =        "20:1--20:2",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2989212",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2989212",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Molka:2016:CAW,
  author =       "Karsten Molka and Giuliano Casale",
  title =        "Contention-Aware Workload Placement for In-Memory
                 Databases in Cloud Environments",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "1",
  pages =        "1:1--1:29",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2961888",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2961888",
  abstract =     "Big data processing is driven by new types of
                 in-memory database systems. In this article, we apply
                 performance modeling to efficiently optimize workload
                 placement for such systems. In particular, we propose
                 novel response time approximations for in-memory
                 databases based on fork-join queuing models and
                 contention probabilities to model variable threading
                 levels and per-class memory occupation under analytical
                 workloads. We combine these approximations with a
                 nonlinear optimization methodology that seeks optimal
                 load dispatching probabilities in order to minimize
                 memory swapping and resource utilization. We compare
                 our approach with state-of-the-art response time
                 approximations using real data from an SAP HANA
                 in-memory system and show that our models markedly
                 improve accuracy over existing approaches, at similar
                 computational costs.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Vergara:2016:FIC,
  author =       "Ekhiotz Jon Vergara and Simin Nadjm-Tehrani and Mikael
                 Asplund",
  title =        "Fairness and Incentive Considerations in Energy
                 Apportionment Policies",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "1",
  pages =        "2:1--2:29",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2970816",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2970816",
  abstract =     "The energy consumption of a system is determined by
                 the system component usage patterns and interactions
                 between the coexisting entities and resources. Energy
                 accounting plays an essential role in revealing the
                 contribution of each entity to the total consumption
                 and for energy management. Unfortunately, energy
                 accounting inherits the apportionment problem of
                 accounting in general, which does not have a general
                 single best solution. In this article, we leverage
                 cooperative game theory, which is commonly used in cost
                 allocation problems to study the energy apportionment
                 problem, that is, the problem of prescribing the actual
                 energy consumption of a system to the consuming
                 entities (e.g., applications, processes, or users of
                 the system). We identify five relevant fairness
                 properties for energy apportionment and present a
                 detailed categorisation and analysis of eight
                 previously proposed energy apportionment policies from
                 different fields in computer and communication systems.
                 In addition, we propose two novel energy apportionment
                 policies based on cooperative game theory that provide
                 strong fairness notion and a rich incentive structure.
                 Our comparative analysis in terms of the identified
                 five fairness properties as well as information
                 requirement and computational complexity shows that
                 there is a tradeoff between fairness and the other
                 evaluation criteria. We provide guidelines to select an
                 energy apportionment policy depending on the purpose of
                 the apportionment and the characteristics of the
                 system.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Nasiriani:2016:FAC,
  author =       "Neda Nasiriani and Cheng Wang and George Kesidis and
                 Bhuvan Urgaonkar and Lydia Y. Chen and Robert Birke",
  title =        "On Fair Attribution of Costs Under Peak-Based Pricing
                 to Cloud Tenants",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "1",
  pages =        "3:1--3:28",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2970815",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2970815",
  abstract =     "The costs incurred by cloud providers towards
                 operating their data centers are often determined in
                 large part by their peak demands. The pricing schemes
                 currently used by cloud providers to recoup these costs
                 from their tenants, however, do not distinguish tenants
                 based on their contributions to the cloud's overall
                 peak demand. Using the concrete example of peak-based
                 pricing as employed by many electric utility companies,
                 we show that this ``gap'' may lead to unfair
                 attribution of costs to the tenants. Simple
                 enhancements of existing cloud pricing (e.g., analogous
                 to the coincident peak pricing (CPP) used by some
                 electric utilities) do not adequately address these
                 shortcomings and suffer from short-term unfairness and
                 undesirable oscillatory price-vs.-demand relationships
                 offered to tenants. To overcome these shortcomings, we
                 define an alternative pricing scheme to more fairly
                 distribute a cloud's costs among its tenants. We
                 demonstrate the efficacy of our scheme under
                 price-sensitive tenant demand response using a
                 combination of (i) extensive empirical evaluation with
                 recent workloads from commercial data centers operated
                 by IBM and (ii) analytical [modeling] through
                 non-cooperative game theory for a special case of
                 tenant demand model.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Nain:2016:FDD,
  author =       "Philippe Nain and Don Towsley",
  title =        "File Dissemination in Dynamic Graphs: The Case of
                 Independent and Correlated Links in Series",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "1",
  pages =        "4:1--4:23",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2981344",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2981344",
  abstract =     "In this article, we investigate the traversal time of
                 a file across N communication links subject to
                 stochastic changes in the sending rate of each link.
                 Each link's sending rate is modeled by a finite-state
                 Markov process. Two cases, one where links evolve
                 independently of one another ( N mutually independent
                 Markov processes) and the second where their behaviors
                 are dependent (these N Markov processes are not
                 mutually independent), are considered. A particular
                 instance where the above is encountered is ad hoc
                 delay/tolerant networks where links are subject to
                 intermittent unavailability.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Liu:2016:SSS,
  author =       "Qingyun Liu and Xiaohan Zhao and Walter Willinger and
                 Xiao Wang and Ben Y. Zhao and Haitao Zheng",
  title =        "Self-Similarity in Social Network Dynamics",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "1",
  pages =        "5:1--5:26",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2994142",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2994142",
  abstract =     "Analyzing and modeling social network dynamics are key
                 to accurately predicting resource needs and system
                 behavior in online social networks. The presence of
                 statistical scaling properties, that is,
                 self-similarity, is critical for determining how to
                 model network dynamics. In this work, we study the role
                 that self-similarity scaling plays in a social network
                 edge creation (that is, links created between users)
                 process, through analysis of two detailed, time-stamped
                 traces, a 199 million edge trace over 2 years in the
                 Renren social network, and 876K interactions in a
                 4-year trace of Facebook. Using wavelet-based analysis,
                 we find that the edge creation process in both networks
                 is consistent with self-similarity scaling, once we
                 account for periodic user activity that makes edge
                 creation process non-stationary. Using these findings,
                 we build a complete model of social network dynamics
                 that combines temporal and spatial components.
                 Specifically, the temporal behavior of our model
                 reflects self-similar scaling properties, and accounts
                 for certain deterministic non-stationary features. The
                 spatial side accounts for observed long-term graph
                 properties, such as graph distance shrinkage and local
                 declustering. We validate our model against network
                 dynamics in Renren and Facebook datasets, and show that
                 it succeeds in producing desired properties in both
                 temporal patterns and graph structural features.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Krishnasamy:2016:DSR,
  author =       "Subhashini Krishnasamy and Rajat Sen and Sanjay
                 Shakkottai and Sewoong Oh",
  title =        "Detecting Sponsored Recommendations",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "1",
  pages =        "6:1--6:29",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2988543",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:55 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2988543",
  abstract =     "With the vast number of items, Web pages, and news
                 from which to choose, online services and customers
                 both benefit tremendously from personalized recommender
                 systems. Such systems additionally provide great
                 opportunities for targeted advertisements by displaying
                 ads alongside genuine recommendations. We consider a
                 biased recommendation system in which such ads are
                 displayed without any tags (disguised as genuine
                 recommendations), rendering them indistinguishable to a
                 single user. We ask whether it is possible for a small
                 subset of collaborating users to detect such bias. We
                 propose an algorithm that can detect this type of bias
                 through statistical analysis on the collaborating
                 users' feedback. The algorithm requires only binary
                 information indicating whether a user was satisfied
                 with each of the recommended item or not. This makes
                 the algorithm widely appealing to real-world issues
                 such as identification of search engine bias and
                 pharmaceutical lobbying. We prove that the proposed
                 algorithm detects the bias with high probability for a
                 broad class of recommendation systems when a sufficient
                 number of users provides feedback on a sufficient
                 number of recommendations. We provide extensive
                 simulations with real datasets and practical
                 recommender systems, which confirm the trade-offs in
                 the theoretical guarantees.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Petsas:2017:MMA,
  author =       "Thanasis Petsas and Antonis Papadogiannakis and
                 Michalis Polychronakis and Evangelos P. Markatos and
                 Thomas Karagiannis",
  title =        "Measurement, Modeling, and Analysis of the Mobile App
                 Ecosystem",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "2",
  pages =        "7:1--7:33",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2993419",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:56 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=2993419",
  abstract =     "Mobile applications (apps) have been gaining
                 popularity due to the advances in mobile technologies
                 and the large increase in the number of mobile users.
                 Consequently, several app distribution platforms, which
                 provide a new way for developing, downloading, and
                 updating software applications in modern mobile
                 devices, have recently emerged. To better understand
                 the download patterns, popularity trends, and
                 development strategies in this rapidly evolving mobile
                 app ecosystem, we systematically monitored and analyzed
                 four popular third-party Android app marketplaces. Our
                 study focuses on measuring, analyzing, and modeling the
                 app popularity distribution and explores how pricing
                 and revenue strategies affect app popularity and
                 developers' income. Our results indicate that unlike
                 web and peer-to-peer file sharing workloads, the app
                 popularity distribution deviates from commonly observed
                 Zipf-like models. We verify that these deviations can
                 be mainly attributed to a new download pattern, which
                 we refer to as the clustering effect. We validate the
                 existence of this effect by revealing a strong temporal
                 affinity of user downloads to app categories. Based on
                 these observations, we propose a new formal clustering
                 model for the distribution of app downloads and
                 demonstrate that it closely fits measured data.
                 Moreover, we observe that paid apps follow a different
                 popularity distribution than free apps and show how
                 free apps with an ad-based revenue strategy may result
                 in higher financial benefits than paid apps. We believe
                 that this study can be useful to appstore designers for
                 improving content delivery and recommendation systems,
                 as well as to app developers for selecting proper
                 pricing policies to increase their income.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Yi:2017:CDC,
  author =       "Xiaomeng Yi and Fangming Liu and Di Niu and Hai Jin
                 and John C. S. Lui",
  title =        "{Cocoa}: Dynamic Container-Based Group Buying
                 Strategies for Cloud Computing",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "2",
  pages =        "8:1--8:31",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3022876",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:56 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib;
                 http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3022876",
  abstract =     "Although the Infrastructure-as-a-Service (IaaS) cloud
                 offers diverse instance types to users, a significant
                 portion of cloud users, especially those with small and
                 short demands, cannot find an instance type that
                 exactly fits their needs or fully utilize purchased
                 instance-hours. In the meantime, cloud service
                 providers are also faced with the challenge to
                 consolidate small, short jobs, which exhibit strong
                 dynamics, to effectively improve resource utilization.
                 To handle such inefficiencies and improve cloud
                 resource utilization, we propose Cocoa (COmputing in
                 COntAiners), a novel group buying mechanism that
                 organizes jobs with complementary resource demands into
                 groups and allocates them to group buying deals
                 predefined by cloud providers. Each group buying deal
                 offers a resource pool for all the jobs in the deal,
                 which can be implemented as either a virtual machine or
                 a physical server. By running each user job on a
                 virtualized container, our mechanism allows flexible
                 resource sharing among different users in the same
                 group buying deal, while improving resource utilization
                 for cloud providers. To organize jobs with varied
                 resource demands and durations into groups, we model
                 the initial static group organization as a
                 variable-sized vector bin packing problem, and the
                 subsequent dynamic group organization problem as an
                 online multidimensional knapsack problem. Through
                 extensive simulations driven by a large amount of real
                 usage traces from a Google cluster, we evaluate the
                 potential cost reduction achieved by Cocoa. We show
                 that through the effective combination and interaction
                 of the proposed static and dynamic group organization
                 strategies, Cocoa greatly outperforms the existing
                 cloud workload consolidation mechanism, substantiating
                 the feasibility of group buying in cloud computing.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Liu:2017:ECE,
  author =       "Yu-Hang Liu and Xian-He Sun",
  title =        "Evaluating the Combined Effect of Memory Capacity and
                 Concurrency for Many-Core Chip Design",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "2",
  pages =        "9:1--9:25",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3038915",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:56 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3038915",
  abstract =     "Modern memory systems are structured under hierarchy
                 and concurrency. The combined impact of hierarchy and
                 concurrency, however, is application dependent and
                 difficult to describe. In this article, we introduce C
                 2 -Bound, a data-driven analytical model that serves
                 the purpose of optimizing many-core design. C 2 -Bound
                 considers both memory capacity and data access
                 concurrency. It utilizes the combined power of the
                 newly proposed latency model, concurrent average memory
                 access time, and the well-known memory-bounded speedup
                 model (Sun-Ni's law) to facilitate computing tasks.
                 Compared to traditional chip designs that lack the
                 notion of memory capacity and concurrency, the C 2
                 -Bound model finds that memory bound factors
                 significantly impact the optimal number of cores as
                 well as their optimal silicon area allocations,
                 especially for data-intensive applications with a
                 non-parallelizable sequential portion. Therefore, our
                 model is valuable to the design of next-generation
                 many-core architectures that target big data
                 processing, where working sets are usually larger than
                 the conventional scientific computing. These findings
                 are evidenced by our detailed simulations, which show,
                 with C 2 -Bound, the design space of chip design can be
                 narrowed down significantly up to four orders of
                 magnitude. C 2 -Bound analytic results can be either
                 used in reconfigurable hardware environments or, by
                 software designers, applied to scheduling,
                 partitioning, and allocating resources among diverse
                 applications.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Simhon:2017:ARG,
  author =       "Eran Simhon and David Starobinski",
  title =        "Advance Reservation Games",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "2",
  pages =        "10:1--10:21",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3053046",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:56 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3053046",
  abstract =     "Advance reservation (AR) services form a pillar of
                 several branches of the economy, including
                 transportation, lodging, dining, and, more recently,
                 cloud computing. In this work, we use game theory to
                 analyze a slotted AR system in which customers differ
                 in their lead times. For each given time slot, the
                 number of customers requesting service is a random
                 variable following a general probability distribution.
                 Based on statistical information, the customers decide
                 whether or not to make an advance reservation of server
                 resources in future slots for a fee. We prove that only
                 two types of equilibria are possible: either none of
                 the customers makes AR or only customers with lead time
                 greater than some threshold make AR. Our analysis
                 further shows that the fee that maximizes the
                 provider's profit may lead to other equilibria, one of
                 which yields zero profit. In order to prevent ending up
                 with no profit, the provider can elect to advertise a
                 lower fee yielding a guaranteed but smaller profit. We
                 refer to the ratio of the maximum possible profit to
                 the maximum guaranteed profit as the price of
                 conservatism. When the number of customers is a Poisson
                 random variable, we prove that the price of
                 conservatism is one in the single-server case, but can
                 be arbitrarily high in a many-server system.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Kelley:2017:OMA,
  author =       "Jaimie Kelley and Christopher Stewart and Nathaniel
                 Morris and Devesh Tiwari and Yuxiong He and Sameh
                 Elnikety",
  title =        "Obtaining and Managing Answer Quality for Online
                 Data-Intensive Services",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "2",
  pages =        "11:1--11:31",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3055280",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:56 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3055280",
  abstract =     "Online data-intensive (OLDI) services use anytime
                 algorithms to compute over large amounts of data and
                 respond quickly. Interactive response times are a
                 priority, so OLDI services parallelize query execution
                 across distributed software components and return best
                 effort answers based on the data so far processed.
                 Omitted data from slow components could lead to better
                 answers, but tracing online how much better the answers
                 could be is difficult. We propose Ubora, a design
                 approach to measure the effect of slow-running
                 components on the quality of answers. Ubora randomly
                 samples online queries and executes them a second time.
                 The first online execution omits data from slow
                 components and provides interactive answers. The second
                 execution uses mature results from intermediate
                 components completed after the online execution
                 finishes. Ubora uses memoization to speed up mature
                 executions by replaying network messages exchanged
                 between components. Our systems-level implementation
                 works for a wide range of services, including
                 Hadoop/Yarn, Apache Lucene, the EasyRec Recommendation
                 Engine, and the OpenEphyra question-answering system.
                 Ubora computes answer quality with more mature
                 executions per second than competing approaches that do
                 not use memoization. With Ubora, we show that answer
                 quality is effective at guiding online admission
                 control. While achieving the same answer quality on
                 high-priority queries, our adaptive controller had 55\%
                 higher peak throughput on low-priority queries than a
                 competing controller guided by the rate of timeouts.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Joshi:2017:ERT,
  author =       "Gauri Joshi and Emina Soljanin and Gregory Wornell",
  title =        "Efficient Redundancy Techniques for Latency Reduction
                 in Cloud Systems",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "2",
  pages =        "12:1--12:30",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3055281",
  ISSN =         "2376-3639 (print), 2376-3647 (electronic)",
  ISSN-L =       "2376-3639",
  bibdate =      "Thu Jun 15 12:19:56 MDT 2017",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "http://dl.acm.org/citation.cfm?id=3055281",
  abstract =     "In cloud computing systems, assigning a task to
                 multiple servers and waiting for the earliest copy to
                 finish is an effective method to combat the variability
                 in response time of individual servers and reduce
                 latency. But adding redundancy may result in higher
                 cost of computing resources, as well as an increase in
                 queueing delay due to higher traffic load. This work
                 helps in understanding when and how redundancy gives a
                 cost-efficient reduction in latency. For a general task
                 service time distribution, we compare different
                 redundancy strategies in terms of the number of
                 redundant tasks and the time when they are issued and
                 canceled. We get the insight that the log-concavity of
                 the task service time creates a dichotomy of when
                 adding redundancy helps. If the service time
                 distribution is log-convex (i.e., log of the tail
                 probability is convex), then adding maximum redundancy
                 reduces both latency and cost. And if it is log-concave
                 (i.e., log of the tail probability is concave), then
                 less redundancy, and early cancellation of redundant
                 tasks is more effective. Using these insights, we
                 design a general redundancy strategy that achieves a
                 good latency-cost trade-off for an arbitrary service
                 time distribution. This work also generalizes and
                 extends some results in the analysis of fork-join
                 queues.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Du:2017:SCB,
  author =       "Yuhuan Du and Gustavo {De Veciana}",
  title =        "Scheduling for Cloud-Based Computing Systems to
                 Support Soft Real-Time Applications",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "3",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3063713",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:14 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3063713",
  abstract =     "Cloud-based computing infrastructure provides an
                 efficient means to support real-time processing
                 workloads, for example, virtualized base station
                 processing, and collaborative video conferencing. This
                 article addresses resource allocation for a computing
                 system with multiple resources supporting heterogeneous
                 soft real-time applications subject to Quality of
                 Service (QoS) constraints on failures to meet
                 processing deadlines. We develop a general outer bound
                 on the feasible QoS region for non-clairvoyant resource
                 allocation policies and an inner bound for a natural
                 class of policies based on dynamically prioritizing
                 applications' tasks by favoring those with the largest
                 (QoS) deficits. This provides an avenue to study the
                 efficiency of two natural resource allocation policies:
                 (1) priority-based greedy task scheduling for
                 applications with variable workloads and (2)
                 priority-based task selection and optimal scheduling
                 for applications with deterministic workloads. The
                 near-optimality of these simple policies emerges when
                 task processing deadlines are relatively large and/or
                 when the number of compute resources is large. Analysis
                 and simulations show substantial resource savings for
                 such policies over reservation-based designs.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Dong:2017:CAT,
  author =       "Fang Dong and Kui Wu and Venkatesh Srinivasan",
  title =        "Copula Analysis of Temporal Dependence Structure in
                 {Markov} Modulated {Poisson} Process and Its
                 Applications",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "3",
  pages =        "14:1--14:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3089254",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:14 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3089254",
  abstract =     "The Markov Modulated Poisson Process (MMPP) has been
                 extensively studied in random process theory and widely
                 applied in various applications involving Poisson
                 arrivals whose rate varies following a Markov process.
                 Despite the rich literature on MMPP, very little is
                 known on its intricate temporal dependence structure.
                 No exact solution is available so far to capture the
                 functional temporal dependence of MMPP at the
                 stationary state over slotted times. This article
                 tackles the above challenges with copula analysis. It
                 not only presents a novel analytical framework to
                 capture the temporal dependence of MMPP but also
                 provides the exact copula-based solutions for single
                 MMPP as well as the aggregate of independent MMPP. This
                 theoretical contribution discloses functional
                 dependence structure of MMPP. It also lays the
                 foundation for many applications that rely on the
                 temporal dependence of MMPP for adaptive control or
                 predictive resource provisioning. We demonstrate case
                 studies, with real-world trace data as well as
                 simulation, to illustrate the practical significance of
                 our analytical results.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Ferragut:2017:CVC,
  author =       "Andres Ferragut and Fernando Paganini and Adam
                 Wierman",
  title =        "Controlling the Variability of Capacity Allocations
                 Using Service Deferrals",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "3",
  pages =        "15:1--15:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3086506",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:14 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3086506",
  abstract =     "Ensuring predictability is a crucial goal for service
                 systems. Traditionally, research has focused on
                 designing systems that ensure predictable performance
                 for service requests. Motivated by applications in
                 cloud computing and electricity markets, this article
                 focuses on a different form of predictability:
                 predictable allocations of service capacity. The focus
                 of the article is a new model where service capacity
                 can be scaled dynamically and service deferrals
                 (subject to deadline constraints) can be used to
                 control the variability of the active service capacity.
                 Four natural policies for the joint problem of
                 scheduling and managing the active service capacity are
                 considered. For each, the variability of service
                 capacity and the likelihood of deadline misses are
                 derived. Further, the paper illustrates how pricing can
                 be used to provide incentives for jobs to reveal
                 deadlines and thus enable the possibility of service
                 deferral in systems where the flexibility of jobs is
                 not known to the system a priori.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Li:2017:IPO,
  author =       "Hao Li and Xinhai Xu and Miao Wang and Chao Li and
                 Xiaoguang Ren and Xuejun Yang",
  title =        "Insertion of {PETSc} in the {OpenFOAM} Framework",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "3",
  pages =        "16:1--16:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3098821",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:14 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3098821",
  abstract =     "OpenFOAM is a widely used open source framework for
                 simulation in several areas of computational fluid
                 dynamics and engineering. As a partial differential
                 equation (PDE)-based framework, OpenFOAM suffers from a
                 performance bottleneck in solving large-scale sparse
                 linear systems of equations. To address the problem,
                 this article proposes a novel OpenFOAM-PETSc framework
                 by inserting PETSc, a dedicated numerical solving
                 package, into the OpenFOAM to speed up the process of
                 solving linear equation systems. The design of the
                 OpenFOAM-PETSc framework is described, and the
                 implementation of an efficient matrix conversion
                 algorithm is given as a case study. Validation tests on
                 a high-performance computing cluster show that
                 OpenFOAM-PETSc reduces the time of solving PDEs by
                 about 27\% in the lid-driven cavity flow case and by
                 more than 50\% in flow around the cylinder case in
                 comparison with OpenFOAM, without compromising the
                 scalability. In addition, this article also gives a
                 preliminary performance analysis of different numerical
                 solution methods, which may provide guidelines for
                 further optimizations.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Liu:2017:FPE,
  author =       "Yanpei Liu and Guilherme Cox and Qingyuan Deng and
                 Stark C. Draper and Ricardo Bianchini",
  title =        "Fast Power and Energy Management for Future Many-Core
                 Systems",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "3",
  pages =        "17:1--17:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3086504",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:14 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3086504",
  abstract =     "Future servers will incorporate many active low-power
                 modes for each core and for the main memory subsystem.
                 Though these modes provide flexibility for power and/or
                 energy management via Dynamic Voltage and Frequency
                 Scaling (DVFS), prior work has shown that they must be
                 managed in a coordinated manner. This requirement
                 creates a combinatorial space of possible power mode
                 configurations. As a result, it becomes increasingly
                 challenging to quickly select the configuration that
                 optimizes for both performance and power/energy
                 efficiency. In this article, we propose a novel queuing
                 model for working with the abundant active low-power
                 modes in many-core systems. Based on the queuing model,
                 we derive two fast algorithms that optimize for
                 performance and efficiency using both CPU and memory
                 DVFS. Our first algorithm, called FastCap, maximizes
                 the performance of applications under a full-system
                 power cap, while promoting fairness across
                 applications. Our second algorithm, called FastEnergy,
                 maximizes the full-system energy savings under
                 predefined application performance loss bounds. Both
                 FastCap and FastEnergy operate online and efficiently,
                 using a small set of performance counters as input. To
                 evaluate them, we simulate both algorithms for a
                 many-core server running different types of workloads.
                 Our results show that FastCap achieves better
                 application performance and fairness than prior power
                 capping techniques for the same power budget, whereas
                 FastEnergy conserves more energy than prior energy
                 management techniques for the same performance
                 constraint. FastCap and FastEnergy together demonstrate
                 the applicability of the queuing model for managing the
                 abundant active low-power modes in many-core systems.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Anselmi:2017:EC,
  author =       "Jonatha Anselmi and Danilo Ardagna and John C. S. Lui
                 and Adam Wierman and Yunjian Xu and Zichao Yang",
  title =        "The Economics of the Cloud",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "4",
  pages =        "18:1--18:??",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3086574",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:15 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3086574",
  abstract =     "This article proposes a model to study the interaction
                 of price competition and congestion in the cloud
                 computing marketplace. Specifically, we propose a
                 three-tier market model that captures a marketplace
                 with users purchasing services from
                 Software-as-a-Service (SaaS) providers, which in turn
                 purchase computing resources from either
                 Provider-as-a-Service (PaaS) or
                 Infrastructure-as-a-Service (IaaS) providers. Within
                 each level, we define and characterize market
                 equilibria. Further, we use these characterizations to
                 understand the relative profitability of SaaSs and
                 PaaSs/IaaSs and to understand the impact of price
                 competition on the user experienced performance, that
                 is, the ``price of anarchy'' of the cloud marketplace.
                 Our results highlight that both of these depend
                 fundamentally on the degree to which congestion results
                 from shared or dedicated resources in the cloud.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Niu:2017:RAS,
  author =       "Di Niu and Hong Xu and Baochun Li",
  title =        "Resource Auto-Scaling and Sparse Content Replication
                 for Video Storage Systems",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "4",
  pages =        "19:1--19:??",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3079045",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:15 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3079045",
  abstract =     "Many video-on-demand (VoD) providers are relying on
                 public cloud providers for video storage, access, and
                 streaming services. In this article, we investigate how
                 a VoD provider may make optimal bandwidth reservations
                 from a cloud service provider to guarantee the
                 streaming performance while paying for the bandwidth,
                 storage, and transfer costs. We propose a predictive
                 resource auto-scaling system that dynamically books the
                 minimum amount of bandwidth resources from multiple
                 servers in a cloud storage system to allow the VoD
                 provider to match its short-term demand projections. We
                 exploit the anti-correlation between the demands of
                 different videos for statistical multiplexing to hedge
                 the risk of under-provisioning. The optimal load
                 direction from video channels to cloud servers without
                 replication constraints is derived with provable
                 performance. We further study the joint load direction
                 and sparse content placement problem that aims to
                 reduce bandwidth reservation cost under sparse content
                 replication requirements. We propose several
                 algorithms, and especially an iterative L 1 -norm
                 penalized optimization procedure, to efficiently solve
                 the problem while effectively limiting the video
                 migration overhead. The proposed system is backed up by
                 a demand predictor that forecasts the expectation,
                 volatility, and correlation of the streaming traffic
                 associated with different videos based on statistical
                 learning. Extensive simulations are conducted to
                 evaluate our proposed algorithms, driven by the
                 real-world workload traces collected from a commercial
                 VoD system.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Alves:2017:BMI,
  author =       "Renan C. A. Alves and C{\'\i}ntia B. Margi",
  title =        "Behavioral Model of {IEEE 802.15.4} Beacon-Enabled
                 Mode Based on Colored {Petri} Net",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "4",
  pages =        "20:1--20:??",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3115389",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:15 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3115389",
  abstract =     "The IEEE 802.15.4 standard is widely employed in
                 power-constrained scenarios, such as Wireless Sensor
                 Networks deployments. Therefore, modeling this standard
                 is useful to predict network performance and fine-tune
                 parameter settings. Previous work rely on determining
                 all reachable network states, usually by a Markov
                 chain, which is often complex and error prone. In
                 contrast, we provide a novel behavioral approach to the
                 IEEE 802.15.4 modeling, which covers the literature gap
                 in assessing all metrics of interest, modeling
                 asymmetric traffic condition and fully comprising the
                 beacon-enabled mode. In addition, it is possible to
                 test different values for the parameters of the
                 standard, such as aMaxFrameRetries, macMaxCSMABackoffs,
                 initialCW, and aUnitBackoffPeriod. The model was
                 validated by NS2 simulations and by a testbed composed
                 of telosB motes.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Neglia:2017:ATA,
  author =       "Giovanni Neglia and Damiano Carra and Mingdong Feng
                 and Vaishnav Janardhan and Pietro Michiardi and Dimitra
                 Tsigkari",
  title =        "Access-Time-Aware Cache Algorithms",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "4",
  pages =        "21:1--21:??",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3149001",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:15 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3149001",
  abstract =     "Most of the caching algorithms are oblivious to
                 requests' timescale, but caching systems are capacity
                 constrained and, in practical cases, the hit rate may
                 be limited by the cache's impossibility to serve
                 requests fast enough. In particular, the hard-disk
                 access time can be the key factor capping cache
                 performance. In this article, we present a new cache
                 replacement policy that takes advantage of a
                 hierarchical caching architecture, and in particular of
                 access-time difference between memory and disk. Our
                 policy is optimal when requests follow the independent
                 reference model and significantly reduces the hard-disk
                 load, as shown also by our realistic, trace-driven
                 evaluation. Moreover, we show that our policy can be
                 considered in a more general context, since it can be
                 easily adapted to minimize any retrieval cost, as far
                 as costs add over cache misses.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Biondi:2017:WYL,
  author =       "Elisabetta Biondi and Chiara Boldrini and Andrea
                 Passarella and Marco Conti",
  title =        "What You Lose When You Snooze: How Duty Cycling
                 Impacts on the Contact Process in Opportunistic
                 Networks",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "4",
  pages =        "22:1--22:??",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3149007",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:15 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3149007",
  abstract =     "In opportunistic networks, putting devices in
                 energy-saving mode is crucial to preserve their
                 battery, and hence to increase the lifetime of the
                 network and foster user participation. A popular
                 strategy for energy saving is duty cycling. However,
                 when in energy-saving mode, users cannot communicate
                 with each other. The side effects of duty cycling are
                 twofold. On the one hand, duty cycling may reduce the
                 number of usable contacts for delivering messages,
                 increasing intercontact times, and delays. On the other
                 hand, duty cycling may break long contacts into smaller
                 contacts, thus also reducing the capacity of the
                 opportunistic network. Despite the potential serious
                 effects, the role played by duty cycling in
                 opportunistic networks has been often neglected in the
                 literature. In order to fill this gap, in this article,
                 we propose a general model for deriving the pairwise
                 contact and intercontact times measured when a duty
                 cycling policy is superimposed on the original
                 encounter process determined only by node mobility. The
                 model we propose is general, i.e., not bound to a
                 specific distribution of contact and intercontact
                 times, and very accurate, as we show exploiting two
                 traces of real human mobility for validation. Using
                 this model, we derive several interesting results about
                 the properties of measured contact and intercontact
                 times with duty cycling: their distribution, how their
                 coefficient of variation changes depending on the duty
                 cycle value, and how the duty cycling affects the
                 capacity and delay of an opportunistic network. The
                 applicability of these results is broad, ranging from
                 performance models for opportunistic networks that
                 factor in the duty cycling effect, to the optimisation
                 of the duty cycle to meet a certain target
                 performance.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Abouzeid:2017:LR,
  author =       "Alhussein Abouzeid",
  title =        "List of Reviewers",
  journal =      j-TOMPECS,
  volume =       "2",
  number =       "4",
  pages =        "23:1--23:??",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3162084",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:15 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3162084",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Liu:2018:BGB,
  author =       "Chubo Liu and Kenli Li and Zhuo Tang and Keqin Li",
  title =        "Bargaining Game-Based Scheduling for Performance
                 Guarantees in Cloud Computing",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3141233",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:15 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3141233",
  abstract =     "In this article, we focus on request scheduling with
                 performance guarantees of all users in cloud computing.
                 Each cloud user submits requests with average response
                 time requirement, and the cloud provider tries to find
                 a scheduling scheme, i.e., allocating user requests to
                 limited servers, such that the average response times
                 of all cloud users can be guaranteed. We formulate the
                 considered scenario into a cooperative game among
                 multiple users and try to find a Nash bargaining
                 solution (NBS), which can simultaneously satisfy all
                 users' performance demands. We first prove the
                 existence of NBS and then analyze its computation.
                 Specifically, for the situation when all allocating
                 substreams are strictly positive, we propose a
                 computational algorithm ( CA ), which can find the NBS
                 very efficiently. For the more general case, we propose
                 an iterative algorithm ( IA ), which is based on
                 duality theory. The convergence of our proposed IA
                 algorithm is also analyzed. Finally, we conduct some
                 numerical calculations. The experimental results show
                 that our IA algorithm can find an appropriate
                 scheduling strategy and converges to a stable state
                 very quickly.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Nordio:2018:STQ,
  author =       "Alessandro Nordio and Alberto Tarable and Emilio
                 Leonardi and Marco Ajmone Marsan",
  title =        "Selecting the Top-Quality Item Through Crowd Scoring",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3157736",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:15 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3157736",
  abstract =     "We investigate crowdsourcing algorithms for finding
                 the top-quality item within a large collection of
                 objects with unknown intrinsic quality values. This is
                 an important problem with many relevant applications,
                 such as in networked recommendation systems. The core
                 of the algorithms is that objects are distributed to
                 crowd workers, who return a noisy and biased
                 evaluation. All received evaluations are then combined
                 to identify the top-quality object. We first present a
                 simple probabilistic model for the system under
                 investigation. Then we devise and study a class of
                 efficient adaptive algorithms to assign in an effective
                 way objects to workers. We compare the performance of
                 several algorithms, which correspond to different
                 choices of the design parameters/metrics. In the
                 simulations, we show that some of the algorithms
                 achieve near optimal performance for a suitable setting
                 of the system parameters.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Li:2018:MFA,
  author =       "Bin Li and Aditya Ramamoorthy and R. Srikant",
  title =        "Mean-Field Analysis of Coding Versus Replication in
                 Large Data Storage Systems",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3159172",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:15 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3159172",
  abstract =     "We study cloud storage systems with a very large
                 number of files stored in a very large number of
                 servers. In such systems, files are either replicated
                 or coded to ensure reliability, i.e., to guarantee file
                 recovery from server failures. This redundancy in
                 storage can further be exploited to improve system
                 performance (mean file-access delay) through
                 appropriate load-balancing (routing) schemes. However,
                 it is unclear whether coding or replication is better
                 from a system performance perspective since the
                 corresponding queueing analysis of such systems is, in
                 general, quite difficult except for the trivial case
                 when the system load asymptotically tends to zero.
                 Here, we study the more difficult case where the system
                 load is not asymptotically zero. Using the fact that
                 the system size is large, we obtain a mean-field limit
                 for the steady-state distribution of the number of file
                 access requests waiting at each server. We then use the
                 mean-field limit to show that, for a given storage
                 capacity per file, coding strictly outperforms
                 replication at all traffic loads while improving
                 reliability. Further, the factor by which the
                 performance improves in the heavy traffic is at least
                 as large as in the light-traffic case. Finally, we
                 validate these results through extensive simulations.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Chu:2018:EQC,
  author =       "Cing-Yu Chu and Shannon Chen and Yu-Chuan Yen and
                 Su-Ling Yeh and Hao-Hua Chu and Polly Huang",
  title =        "{EQ}: A {QoE}-Centric Rate Control Mechanism for
                 {VoIP} Calls",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3170430",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:15 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3170430",
  abstract =     "The rising popularity of data calls and the slowed
                 global economy have posed a challenge to voice data
                 networking-how to satisfy the growing user demand for
                 VoIP calls under limited network resources. In a
                 bandwidth-constrained network in particular, raising
                 the bitrate for one call implies a lowered bitrate for
                 another. Therefore, knowing whether it is worthwhile to
                 raise one call's bitrate while other users might
                 complain is crucial to the design of a user-centric
                 rate control mechanism. To this end, previous work
                 (Chen et al. 2012) has reported a log-like relationship
                 between bitrate and user experience (i.e., QoE) in
                 Skype calls. To show that the relationship extends to
                 more general VoIP calls, we conduct a 60-participant
                 user study via the Amazon Mechanical Turk crowdsourcing
                 platform and reaffirm the log-like relationship between
                 the call bitrate and user experience in widely used
                 AMR-WB. The relationship gives rise to a simple and
                 practical rate control scheme that exponentially
                 quantizes the steps of rate change, therefore the
                 name-exponential quantization (EQ). To support that EQ
                 is effective in addressing the challenge, we show
                 through a formal analysis that the resulting bandwidth
                 allocation is optimal in both the overall QoE and the
                 number of calls served. To relate EQ to existing rate
                 control mechanisms, we show in a simulation study that
                 the bitrates of calls administered by EQ converge over
                 time and outperform those controlled by a (na{\"\i}ve)
                 greedy mechanism and the mechanism implemented in
                 Skype.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Zhou:2018:OED,
  author =       "Ruiting Zhou and Zongpeng Li and Chuan Wu",
  title =        "An Online Emergency Demand Response Mechanism for
                 Cloud Computing",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3177755",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:15 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3177755",
  abstract =     "This article studies emergency demand response (EDR)
                 mechanisms from a data center perspective, where a
                 cloud participates in a mandatory EDR program while
                 receiving computing job bids from cloud users in an
                 online fashion. We target a realistic EDR mechanism
                 where (i) the cloud provider dynamically packs
                 different types of resources on servers into requested
                 VMs and computes job schedules to meet users'
                 requirements, (ii) the power consumption of servers in
                 the cloud is limited by the grid through the EDR
                 program, and (iii) the operation cost of the cloud is
                 considered in the calculation of social welfare,
                 measured by an electricity cost that consists of both
                 volume charge and peak charge. We propose an online
                 auction for dynamic cloud resource provisioning that is
                 under the control of the EDR program, runs in
                 polynomial time, achieves truthfulness, and
                 close-to-optimal social welfare for the cloud
                 ecosystem. In the design of the online auction, we
                 first propose a new framework, compact exponential LPs,
                 to handle job scheduling constraints in the time
                 domain. We then develop a posted pricing auction
                 framework toward the truthful online auction design,
                 which leverages the classic primal-dual technique for
                 approximation algorithm design. We evaluate our online
                 auctions through both theoretical analysis and
                 empirical studies driven by real-world traces.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Koziolek:2018:SIS,
  author =       "Anne Koziolek and Evgenia Smirni",
  title =        "Special Issue: Selected Paper From the {8th {ACM\slash
                 SPEC} International Conference on Performance
                 Engineering (ICPE 2017)}",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "2",
  pages =        "6:1--6:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3186329",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3186329",
  acknowledgement = ack-nhfb,
  articleno =    "6e",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Wang:2018:EAA,
  author =       "Cheng Wang and Qianlin Liang and Bhuvan Urgaonkar",
  title =        "An Empirical Analysis of {Amazon EC2} Spot Instance
                 Features Affecting Cost-Effective Resource
                 Procurement",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "2",
  pages =        "6:1--6:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164538",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3164538",
  abstract =     "Many cost-conscious public cloud workloads
                 (``tenants'') are turning to Amazon EC2's spot
                 instances because, on average, these instances offer
                 significantly lower prices (up to 10 times lower) than
                 on-demand and reserved instances of comparable
                 advertised resource capacities. To use spot instances
                 effectively, a tenant must carefully weigh the lower
                 costs of these instances against their poorer
                 availability. Toward this, we empirically study four
                 features of EC2 spot instance operation that a
                 cost-conscious tenant may find useful to model. Using
                 extensive evaluation based on historical spot instance
                 data, we show shortcomings in the state-of-the-art
                 modeling of these features that we overcome. As an
                 extension to our prior work, we conduct data analysis
                 on a rich dataset of the latest spot price traces
                 collected from a variety of EC2 spot markets. Our
                 analysis reveals many novel properties of spot instance
                 operation, some of which offer predictive value whereas
                 others do not. Using these insights, we design
                 predictors for our features that offer a balance
                 between computational efficiency (allowing for online
                 resource procurement) and cost efficacy. We explore
                 ``case studies'' wherein we implement prototypes of
                 dynamic spot instance procurement advised by our
                 predictors for two types of workloads. Compared to the
                 state of the art, our approach achieves (i) comparable
                 cost but much better performance (fewer bid failures)
                 for a latency-sensitive in-memory Memcached cache and
                 (ii) an additional 18\% cost savings with comparable
                 (if not better than) performance for a delay-tolerant
                 batch workload.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Cassell:2018:DPM,
  author =       "Benjamin Cassell and Tyler Szepesi and Jim Summers and
                 Tim Brecht and Derek Eager and Bernard Wong",
  title =        "Disk Prefetching Mechanisms for Increasing {HTTP}
                 Streaming Video Server Throughput",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "2",
  pages =        "7:1--7:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164536",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3164536",
  abstract =     "Most video streaming traffic is delivered over HTTP
                 using standard web servers. While traditional web
                 server workloads consist of requests that are primarily
                 for small files that can be serviced from the file
                 system cache, HTTP video streaming workloads often
                 service a long tail of large infrequently requested
                 videos. As a result, optimizing disk accesses is
                 critical to obtaining good server throughput. In this
                 article we explore serialized, aggressive disk
                 prefetching, a technique that can be used to improve
                 the throughput of HTTP streaming video web servers. We
                 identify how serialization and aggressive prefetching
                 affect performance, and, based on our findings, we
                 construct and evaluate Libception, an application-level
                 shim library that implements both techniques. By
                 dynamically linking against Libception at runtime,
                 applications are able to transparently obtain benefits
                 from serialization and aggressive prefetching without
                 needing to change their source code. In contrast to
                 other approaches that modify applications, make kernel
                 changes, or attempt to optimize kernel tuning,
                 Libception provides a portable and relatively simple
                 system in which techniques for optimizing I/O in HTTP
                 video streaming servers can be implemented and
                 evaluated. We empirically evaluate the efficacy of
                 serialization and aggressive prefetching both with and
                 without Libception, using three web servers (Apache,
                 nginx, and the userver) running on two operating
                 systems (FreeBSD and Linux). We find that, by using
                 Libception, we can improve streaming throughput for all
                 three web servers by at least a factor of 2 on FreeBSD
                 and a factor of 2.5 on Linux. Additionally, we find
                 that with significant tuning of Linux kernel
                 parameters, we can achieve similar performance to
                 Libception by globally modifying Linux's disk prefetch
                 behaviour. Finally, we demonstrate Libception's ability
                 to reduce the completion time of a microbenchmark
                 involving two applications competing for disk
                 resources.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Ilyushkin:2018:EPE,
  author =       "Alexey Ilyushkin and Ahmed Ali-Eldin and Nikolas
                 Herbst and Andr{\'e} Bauer and Alessandro V.
                 Papadopoulos and Dick Epema and Alexandru Iosup",
  title =        "An Experimental Performance Evaluation of Autoscalers
                 for Complex Workflows",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "2",
  pages =        "8:1--8:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164537",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3164537",
  abstract =     "Elasticity is one of the main features of cloud
                 computing allowing customers to scale their resources
                 based on the workload. Many autoscalers have been
                 proposed in the past decade to decide on behalf of
                 cloud customers when and how to provision resources to
                 a cloud application based on the workload utilizing
                 cloud elasticity features. However, in prior work, when
                 a new policy is proposed, it is seldom compared to the
                 state-of-the-art, and is often compared only to static
                 provisioning using a predefined quality of service
                 target. This reduces the ability of cloud customers and
                 of cloud operators to choose and deploy an autoscaling
                 policy, as there is seldom enough analysis on the
                 performance of the autoscalers in different operating
                 conditions and with different applications. In our
                 work, we conduct an experimental performance evaluation
                 of autoscaling policies, using as application model
                 workflows, a popular formalism for automating resource
                 management for applications with well-defined yet
                 complex structures. We present a detailed comparative
                 study of general state-of-the-art autoscaling policies,
                 along with two new workflow-specific policies. To
                 understand the performance differences between the
                 seven policies, we conduct various experiments and
                 compare their performance in both pairwise and group
                 comparisons. We report both individual and aggregated
                 metrics. As many workflows have deadline requirements
                 on the tasks, we study the effect of autoscaling on
                 workflow deadlines. Additionally, we look into the
                 effect of autoscaling on the accounted and hourly based
                 charged costs, and we evaluate performance variability
                 caused by the autoscaler selection for each group of
                 workflow sizes. Our results highlight the trade-offs
                 between the suggested policies, how they can impact
                 meeting the deadlines, and how they perform in
                 different operating conditions, thus enabling a better
                 understanding of the current state-of-the-art.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Khan:2018:RAE,
  author =       "Kashif Nizam Khan and Mikael Hirki and Tapio Niemi and
                 Jukka K. Nurminen and Zhonghong Ou",
  title =        "{RAPL} in Action: Experiences in Using {RAPL} for
                 Power Measurements",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "2",
  pages =        "9:1--9:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3177754",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3177754",
  abstract =     "To improve energy efficiency and comply with the power
                 budgets, it is important to be able to measure the
                 power consumption of cloud computing servers. Intel's
                 Running Average Power Limit (RAPL) interface is a
                 powerful tool for this purpose. RAPL provides power
                 limiting features and accurate energy readings for CPUs
                 and DRAM, which are easily accessible through different
                 interfaces on large distributed computing systems.
                 Since its introduction, RAPL has been used extensively
                 in power measurement and modeling. However, the
                 advantages and disadvantages of RAPL have not been well
                 investigated yet. To fill this gap, we conduct a series
                 of experiments to disclose the underlying strengths and
                 weaknesses of the RAPL interface by using both
                 customized microbenchmarks and three well-known
                 application level benchmarks: Stream, Stress-ng, and
                 ParFullCMS. Moreover, to make the analysis as realistic
                 as possible, we leverage two production-level power
                 measurement datasets from the Taito, a supercomputing
                 cluster of the Finnish Center of Scientific Computing
                 and also replicate our experiments on Amazon EC2. Our
                 results illustrate different aspects of RAPL and
                 document the findings through comprehensive analysis.
                 Our observations reveal that RAPL readings are highly
                 correlated with plug power, promisingly accurate
                 enough, and have negligible performance overhead.
                 Experimental results suggest RAPL can be a very useful
                 tool to measure and monitor the energy consumption of
                 servers without deploying any complex power meters. We
                 also show that there are still some open issues, such
                 as driver support, non-atomicity of register updates,
                 and unpredictable timings that might weaken the
                 usability of RAPL in certain scenarios. For such
                 scenarios, we pinpoint solutions and workarounds.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Du:2018:EOL,
  author =       "Yuhuan Du and Gustavo {De Veciana}",
  title =        "Efficiency and Optimality of Largest Deficit First
                 Prioritization: Dynamic User Prioritization for Soft
                 Real-Time Applications",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "3",
  pages =        "10:1--10:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3200479",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3200479",
  abstract =     "An increasing number of real-time applications with
                 compute and/or communication deadlines are being
                 supported on a shared infrastructure. Such applications
                 can often tolerate occasional deadline violations
                 without substantially impacting their Quality of
                 Service (QoS). This article explores the efficient
                 allocation of shared resources to satisfy such QoS
                 requirements. We study a simple framework which
                 decouples this problem into two subproblems: (1)
                 dynamic prioritization of users based on arbitrary
                 functions of their deficits (difference of achieved vs.
                 required QoS), and (2) priority-based resource
                 allocation on the underlying compute fabric. In this
                 article, we shall assume the solution to the latter is
                 fixed, e.g., as realized in the task prioritization
                 capabilities of current hardware/software, and focus on
                 compatible solutions to the former. We first
                 characterize the set of feasible QoS requirements and
                 show the optimality of max weight-like prioritization.
                 We then consider simple weighted Largest Deficit First
                 (w-LDF) prioritization policies, where users with
                 higher weighted QoS deficits are given higher priority.
                 The article gives an inner bound for the feasible set
                 under w-LDF policies, and, under an additional
                 monotonicity assumption, characterizes its geometry
                 leading to a sufficient condition for optimality.
                 Additional insights on the efficiency ratio of w-LDF
                 policies, the optimality of hierarchical-LDF, and
                 characterization of clustering of failures are also
                 discussed.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Chen:2018:CEO,
  author =       "Shutong Chen and Zhi Zhou and Fangming Liu and
                 Zongpeng Li and Shaolei Ren",
  title =        "{CloudHeat}: An Efficient Online Market Mechanism for
                 Datacenter Heat Harvesting",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "3",
  pages =        "11:1--11:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3199675",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3199675",
  abstract =     "Datacenters are major energy consumers and dissipate
                 an enormous amount of waste heat. Simple outdoor
                 discharging of datacenter heat is energy-consuming and
                 environmentally unfriendly. By harvesting datacenter
                 waste heat and selling to the district heating system
                 (DHS), both energy cost compensation and environment
                 protection can be achieved. To realize such benefits in
                 practice, an efficient market mechanism is required to
                 incentivize the participation of datacenters. This work
                 proposes CloudHeat, an online reverse auction mechanism
                 for the DHS to solicit heat bids from datacenters. To
                 minimize long-term social operational cost of the DHS
                 and the datacenters, we apply a RFHC approach for
                 decomposing the long-term problem into a series of
                 one-round auctions, guaranteeing a small loss in
                 competitive ratio. The one-round optimization is still
                 NP-hard, and we employ a randomized auction framework
                 to simultaneously guarantee truthfulness, polynomial
                 running time, and an approximation ratio of 2. The
                 performance of CloudHeat is validated through
                 theoretical analysis and trace-driven simulation
                 studies.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Varki:2018:GGP,
  author =       "Elizabeth Varki",
  title =        "{GPSonflow}: Geographic Positioning of Storage for
                 Optimal Nice Flow",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "3",
  pages =        "12:1--12:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3197656",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3197656",
  abstract =     "This article evaluates the maximum data flow from a
                 sender to a receiver via the internet when all
                 transmissions are scheduled for early morning hours.
                 The significance of early morning hours is that
                 internet congestion is low while users sleep. When the
                 sender and receiver lie in proximal time zones, a
                 direct transmission from sender to receiver can be
                 scheduled for early morning hours. When the sender and
                 receiver are separated by several time zones such that
                 their sleep times are non-overlapping, data can still
                 be transmitted during early morning hours with an
                 indirect store-and-forward transfer. The data are
                 transmitted from the sender to intermediate end
                 networks or data centers that serve as storage hops en
                 route to receiver. The storage hops are placed in zones
                 that are time proximal to the sender or the receiver so
                 that all transmissions to and from storage hops occur
                 during low-congestion early morning hours. This article
                 finds the optimal locations and bandwidth distributions
                 of storage hops for maximum nice internet flow from a
                 sender to a receiver.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Ray:2018:SSC,
  author =       "Avik Ray and Sujay Sanghavi and Sanjay Shakkottai",
  title =        "Searching for a Single Community in a Graph",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "3",
  pages =        "13:1--13:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3200863",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3200863",
  abstract =     "In standard graph clustering/community detection, one
                 is interested in partitioning the graph into more
                 densely connected subsets of nodes. In contrast, the
                 search problem of this article aims to only find the
                 nodes in a single such community, the target, out of
                 the many communities that may exist. To do so, we are
                 given suitable side information about the target; for
                 example, a very small number of nodes from the target
                 are labeled as such. We consider a general yet simple
                 notion of side information: all nodes are assumed to
                 have random weights, with nodes in the target having
                 higher weights on average. Given these weights and the
                 graph, we develop a variant of the method of moments
                 that identifies nodes in the target more reliably, and
                 with lower computation, than generic community
                 detection methods that do not use side information and
                 partition the entire graph. Our empirical results show
                 significant gains in runtime, and also gains in
                 accuracy over other graph clustering algorithms.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Malik:2018:SAL,
  author =       "Maria Malik and Setareh Rafatirad and Houman
                 Homayoun",
  title =        "System and Architecture Level Characterization of Big
                 Data Applications on Big and Little Core Server
                 Architectures",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "3",
  pages =        "14:1--14:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3229049",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3229049",
  abstract =     "The rapid growth in data yields challenges to process
                 data efficiently using current high-performance server
                 architectures such as big Xeon cores. Furthermore,
                 physical design constraints, such as power and density,
                 have become the dominant limiting factor for scaling
                 out servers. Low-power embedded cores in servers such
                 as little Atom have emerged as a promising solution to
                 enhance energy-efficiency to address these challenges.
                 Therefore, the question of whether to process the big
                 data applications on big Xeon- or Little Atom-based
                 servers becomes important. In this work, through
                 methodical investigation of power and performance
                 measurements, and comprehensive application-level,
                 system-level, and micro-architectural level analysis,
                 we characterize dominant big data applications on big
                 Xeon- and little Atom-based server architectures. The
                 characterization results across a wide range of
                 real-world big data applications, and various software
                 stacks demonstrate how the choice of big- versus
                 little-core-based server for energy-efficiency is
                 significantly influenced by the size of data,
                 performance constraints, and presence of accelerator.
                 In addition, we analyze processor resource utilization
                 of this important class of applications,such as memory
                 footprints, CPU utilization, and disk bandwidth, to
                 understand their run-time behavior. Furthermore, we
                 perform micro-architecture-level analysis to highlight
                 where improvement is needed in big- and little-core
                 microarchitectures to address their performance
                 bottlenecks.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Li:2018:MFG,
  author =       "Jian Li and Bainan Xia and Xinbo Geng and Hao Ming and
                 Srinivas Shakkottai and Vijay Subramanian and Le Xie",
  title =        "Mean Field Games in Nudge Systems for Societal
                 Networks",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "4",
  pages =        "15:1--15:??",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3232076",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3232076",
  abstract =     "We consider the general problem of resource sharing in
                 societal networks, consisting of interconnected
                 communication, transportation, energy, and other
                 networks important to the functioning of society.
                 Participants in such network need to take decisions
                 daily, both on the quantity of resources to use as well
                 as the periods of usage. With this in mind, we discuss
                 the problem of incentivizing users to behave in such a
                 way that society as a whole benefits. To perceive
                 societal level impact, such incentives may take the
                 form of rewarding users with lottery tickets based on
                 good behavior and periodically conducting a lottery to
                 translate these tickets into real rewards. We will pose
                 the user decision problem as a mean field game and the
                 incentives question as one of trying to select a good
                 mean field equilibrium (MFE). In such a framework, each
                 agent (a participant in the societal network) takes a
                 decision based on an assumed distribution of actions of
                 his/her competitors and the incentives provided by the
                 social planner. The system is said to be at MFE if the
                 agent's action is a sample drawn from the assumed
                 distribution. We will show the existence of such an MFE
                 under general settings, and also illustrate how to
                 choose an attractive equilibrium using as an example
                 demand-response in the (smart) electricity network.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Pajevic:2018:EPC,
  author =       "Ljubica Pajevic and Viktoria Fodor and Gunnar
                 Karlsson",
  title =        "Ensuring Persistent Content in Opportunistic Networks
                 via Stochastic Stability Analysis",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "4",
  pages =        "16:1--16:??",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3232161",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3232161",
  abstract =     "The emerging device-to-device communication solutions
                 and the abundance of mobile applications and services
                 make opportunistic networking not only a feasible
                 solution but also an important component of future
                 wireless networks. Specifically, the distribution of
                 locally relevant content could be based on the
                 community of mobile users visiting an area, if
                 long-term content survival can be ensured this way. In
                 this article, we establish the conditions of content
                 survival in such opportunistic networks, considering
                 the user mobility patterns, as well as the time users
                 keep forwarding the content, as the controllable system
                 parameter. We model the content spreading with an
                 epidemic process, and derive a stochastic differential
                 equations based approximation. By means of stability
                 analysis, we determine the necessary user contribution
                 to ensure content survival. We show that the required
                 contribution from the users depends significantly on
                 the size of the population, that users need to
                 redistribute content only in a short period within
                 their stay, and that they can decrease their
                 contribution significantly in crowded areas. Hence,
                 with the appropriate control of the system parameters,
                 opportunistic content sharing can be both reliable and
                 sustainable.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Wang:2018:QMS,
  author =       "Weikun Wang and Giuliano Casale and Ajay Kattepur and
                 Manoj K. Nambiar",
  title =        "{QMLE}: A Methodology for Statistical Inference of
                 Service Demands from Queueing Data",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "4",
  pages =        "17:1--17:??",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3233180",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3233180",
  abstract =     "Estimating the demands placed by services on physical
                 resources is an essential step for the definition of
                 performance models. For example, scalability analysis
                 relies on these parameters to predict queueing delays
                 under increasing loads. In this article, we investigate
                 maximum likelihood (ML) estimators for demands at
                 load-independent and load-dependent resources in
                 systems with parallelism constraints. We define a
                 likelihood function based on state measurements and
                 derive necessary conditions for its maximization. We
                 then obtain novel estimators that accurately and
                 inexpensively obtain service demands using only
                 aggregate state data. With our approach, and also
                 thanks to approximation methods for computing marginal
                 and joint distributions for the load-dependent case,
                 confidence intervals can be rigorously derived,
                 explicitly taking into account both topology and
                 concurrency levels of the services. Our estimators and
                 their confidence intervals are validated against
                 simulations and real system measurements for two
                 multi-tier applications, showing high accuracy also in
                 models with load-dependent resources.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Azimi:2018:SVS,
  author =       "Reza Azimi and Tyler Fox and Wendy Gonzalez and
                 Sherief Reda",
  title =        "Scale-Out vs Scale-Up: A Study of {ARM}-based {SoCs}
                 on Server-Class Workloads",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "4",
  pages =        "18:1--18:??",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3232162",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3232162",
  abstract =     "ARM 64-bit processing has generated enthusiasm to
                 develop ARM-based servers that are targeted for both
                 data centers and supercomputers. In addition to the
                 server-class components and hardware advancements, the
                 ARM software environment has grown substantially over
                 the past decade. Major development ecosystems and
                 libraries have been ported and optimized to run on ARM,
                 making ARM suitable for server-class workloads. There
                 are two trends in available ARM SoCs: mobile-class ARM
                 SoCs that rely on the heterogeneous integration of a
                 mix of CPU cores, GPGPU streaming multiprocessors
                 (SMs), and other accelerators, and the server-class
                 SoCs that instead rely on integrating a larger number
                 of CPU cores with no GPGPU support and a number of IO
                 accelerators. For scaling the number of processing
                 cores, there are two different paradigms: mobile-class
                 SoCs that use scale-out architecture in the form of a
                 cluster of simpler systems connected over a network,
                 and server-class ARM SoCs that use the scale-up
                 solution and leverage symmetric multiprocessing to pack
                 a large number of cores on the chip. In this article,
                 we present ScaleSoC cluster, which is a scale-out
                 solution based on mobile class ARM SoCs. ScaleSoC
                 leverages fast network connectivity and GPGPU
                 acceleration to improve performance and energy
                 efficiency compared to previous ARM scale-out clusters.
                 We consider a wide range of modern server-class
                 parallel workloads to study both scaling paradigms,
                 including latency-sensitive transactional workloads,
                 MPI-based CPU and GPGPU-accelerated scientific
                 applications, and emerging artificial intelligence
                 workloads. We study the performance and energy
                 efficiency of ScaleSoC compared to server-class ARM
                 SoCs and discrete GPGPUs in depth. We quantify the
                 network overhead on the performance of ScaleSoC and
                 show that packing a large number of ARM cores on a
                 single chip does not necessarily guarantee better
                 performance, due to the fact that shared resources,
                 such as last-level cache, become performance
                 bottlenecks. We characterize the GPGPU accelerated
                 workloads and demonstrate that for applications that
                 can leverage the better CPU-GPGPU balance of the
                 ScaleSoC cluster, performance and energy efficiency
                 improve compared to discrete GPGPUs.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Herbst:2018:QCP,
  author =       "Nikolas Herbst and Andr{\'e} Bauer and Samuel Kounev
                 and Giorgos Oikonomou and Erwin {Van Eyk} and George
                 Kousiouris and Athanasia Evangelinou and Rouven Krebs
                 and Tim Brecht and Cristina L. Abad and Alexandru
                 Iosup",
  title =        "Quantifying Cloud Performance and Dependability:
                 Taxonomy, Metric Design, and Emerging Challenges",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "4",
  pages =        "19:1--19:??",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3236332",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3236332",
  abstract =     "In only a decade, cloud computing has emerged from a
                 pursuit for a service-driven information and
                 communication technology (ICT), becoming a significant
                 fraction of the ICT market. Responding to the growth of
                 the market, many alternative cloud services and their
                 underlying systems are currently vying for the
                 attention of cloud users and providers. To make
                 informed choices between competing cloud service
                 providers, permit the cost-benefit analysis of
                 cloud-based systems, and enable system DevOps to
                 evaluate and tune the performance of these complex
                 ecosystems, appropriate performance metrics,
                 benchmarks, tools, and methodologies are necessary.
                 This requires re-examining old system properties and
                 considering new system properties, possibly leading to
                 the re-design of classic benchmarking metrics such as
                 expressing performance as throughput and latency
                 (response time). In this work, we address these
                 requirements by focusing on four system properties: (i)
                 elasticity of the cloud service, to accommodate large
                 variations in the amount of service requested, (ii){\~
                 }performance isolation between the tenants of shared
                 cloud systems and resulting performance variability,
                 (iii){\~ }availability of cloud services and systems,
                 and (iv) the operational risk of running a production
                 system in a cloud environment. Focusing on key metrics
                 for each of these properties, we review the
                 state-of-the-art, then select or propose new metrics
                 together with measurement approaches. We see the
                 presented metrics as a foundation toward upcoming,
                 future industry-standard cloud benchmarks.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Jiang:2018:CTA,
  author =       "Bo Jiang and Philippe Nain and Don Towsley",
  title =        "On the Convergence of the {TTL} Approximation for an
                 {LRU} Cache under Independent Stationary Request
                 Processes",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "4",
  pages =        "20:1--20:??",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3239164",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3239164",
  abstract =     "The modeling and analysis of an LRU cache is extremely
                 challenging as exact results for the main performance
                 metrics (e.g., hit rate) are either lacking or cannot
                 be used because of their high computational complexity
                 for large caches. As a result, various approximations
                 have been proposed. The state-of-the-art method is the
                 so-called TTL approximation, first proposed and shown
                 to be asymptotically exact for IRM requests by Fagin
                 [13]. It has been applied to various other workload
                 models and numerically demonstrated to be accurate but
                 without theoretical justification. In this article, we
                 provide theoretical justification for the approximation
                 in the case where distinct contents are described by
                 independent stationary and ergodic processes. We show
                 that this approximation is exact as the cache size and
                 the number of contents go to infinity. This extends
                 earlier results for the independent reference model.
                 Moreover, we establish results not only for the
                 aggregate cache hit probability but also for every
                 individual content. Last, we obtain bounds on the rate
                 of convergence.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Abid:2018:LR,
  author =       "Amine Abid",
  title =        "List of Reviewers",
  journal =      j-TOMPECS,
  volume =       "3",
  number =       "4",
  pages =        "21:1--21:??",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3271430",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:16 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3271430",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Yin:2019:ETL,
  author =       "Ping Yin and Sen Yang and Jun Xu and Jim Dai and Bill
                 Lin",
  title =        "Efficient Traffic Load-Balancing via Incremental
                 Expansion of Routing Choices",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3243173",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3243173",
  abstract =     "Routing policies play a major role in the performance
                 of communication networks. Backpressure-based adaptive
                 routing algorithms where traffic is load balanced along
                 different routing paths on a per-packet basis have been
                 studied extensively in the literature. Although
                 backpressure-based algorithms have been shown to be
                 networkwide throughput optimal, they typically have
                 poor delay performance under light or moderate loads,
                 because packets may be sent over unnecessarily long
                 routes. Further, backpressure-based algorithms have
                 required every node to compute differential backlogs
                 for every per-destination queue with the corresponding
                 per-destination queue at every adjacent node, which is
                 expensive given the large number of possible pairwise
                 differential backlogs between neighbor nodes. In this
                 article, we propose new backpressure-based adaptive
                 routing algorithms that only use shortest-path routes
                 to destinations when they are sufficient to accommodate
                 the given traffic load, but the proposed algorithms
                 will incrementally expand routing choices as needed to
                 accommodate increasing traffic loads. We show
                 analytically by means of fluid analysis that the
                 proposed algorithms retain networkwide throughput
                 optimality, and we show empirically by means of
                 simulations that our proposed algorithms provide
                 substantial improvements in delay performance. Our
                 evaluations further show that, in practice, our
                 approach dramatically reduces the number of pairwise
                 differential backlogs that have to be computed and the
                 amount of corresponding backlog information that has to
                 be exchanged, because routing choices are only
                 incrementally expanded as needed.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Chen:2019:EQA,
  author =       "Hao Chen and Yijia Zhang and Michael C. Caramanis and
                 Ayse K. Coskun",
  title =        "{EnergyQARE}: {QoS}-Aware Data Center Participation in
                 Smart Grid Regulation Service Reserve Provision",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3243172",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3243172",
  abstract =     "Power market operators have recently introduced smart
                 grid demand response (DR), in which electricity
                 consumers regulate their power usage following market
                 requirements. DR helps stabilize the grid and enables
                 integrating a larger amount of intermittent renewable
                 power generation. Data centers provide unique
                 opportunities for DR participation due to their
                 flexibility in both workload servicing and power
                 consumption. While prior studies have focused on data
                 center participation in legacy DR programs such as
                 dynamic energy pricing and peak shaving, this article
                 studies data centers in emerging DR programs, i.e.,
                 demand side capacity reserves. Among different types of
                 capacity reserves, regulation service reserves (RSRs)
                 are especially attractive due to their relatively
                 higher value. This article proposes EnergyQARE, the
                 Energy and Quality-of-Service (QoS) Aware R SR Enabler,
                 an approach that enables data center RSR provision in
                 real-life scenarios. EnergyQARE not only provides a
                 bidding strategy in RSR provision, but also contains a
                 runtime policy that adaptively modulates data center
                 power through server power management and server
                 provisioning based on workload QoS feedback. To reflect
                 real-life scenarios, this runtime policy handles a
                 heterogeneous set of jobs and considers transition time
                 delay of servers. Simulated numerical results
                 demonstrate that in a general data center scenario,
                 EnergyQARE provides close to 50\% of data center
                 average power consumption as reserves to the market and
                 saves up to 44\% in data center electricity cost, while
                 still meeting workload QoS constraints. Case studies in
                 this article show that the percentages of savings are
                 not sensitive to a specific type of non-interactive
                 workload, or the size of the data center, although they
                 depend strongly on data center utilization and
                 parameters of server power states.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Li:2019:QCS,
  author =       "Zhenhua Li and Yongfeng Zhang and Yunhao Liu and
                 Tianyin Xu and Ennan Zhai and Yao Liu and Xiaobo Ma and
                 Zhenyu Li",
  title =        "A Quantitative and Comparative Study of Network-Level
                 Efficiency for Cloud Storage Services",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3274526",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3274526",
  abstract =     "Cloud storage services such as Dropbox and OneDrive
                 provide users with a convenient and reliable way to
                 store and share data from anywhere, on any device, and
                 at any time. Their cornerstone is the data
                 synchronization (sync) operation, which automatically
                 maps the changes in users' local file systems to the
                 cloud via a series of network communications in a
                 timely manner. Without careful design and
                 implementation, however, the data sync mechanisms could
                 generate overwhelming traffic, causing tremendous
                 financial overhead and performance penalties to both
                 service providers and end users. This article addresses
                 a simple yet critical question: Is the current data
                 sync traffic of cloud storage services efficiently
                 used? We first define a novel metric TUE to quantify
                 the Traffic Usage Efficiency of data
                 synchronization. Then, by conducting comprehensive
                 benchmark experiments and reverse engineering the data
                 sync processes of eight widely used cloud storage
                 services, we uncover their manifold practical endeavors
                 for optimizing the TUE, including three intra-file
                 approaches (compression, incremental sync, and
                 interrupted transfer resumption), two cross-file/-user
                 approaches ( i.e., deduplication and peer-assisted
                 offloading), two batching approaches (file bundling and
                 sync deferment), and two web-specific approaches
                 (thumbnail views and dynamic content loading). Our
                 measurement results reveal that a considerable portion
                 of the data sync traffic is, in a sense, wasteful and
                 can be effectively avoided or significantly reduced via
                 carefully designed data sync mechanisms. Most
                 importantly, our study not only offers practical,
                 actionable guidance for providers to build more
                 efficient, traffic-economic services, but also helps
                 end users pick appropriate services that best fit their
                 use cases and budgets.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Gupta:2019:ERS,
  author =       "Samarth Gupta and Sharayu Moharir",
  title =        "Effect of Recommendations on Serving Content with
                 Unknown Demand",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3289324",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3289324",
  abstract =     "We consider the task of content replication in
                 distributed content delivery systems used by
                 Video-on-Demand (VoD) services with large content
                 catalogs. The prior work in this area focuses on the
                 setting where each request is generated independent of
                 all past requests. Motivated by the fact that most
                 popular VoD services offer recommendations to users
                 based on their viewing history, in a departure from
                 existing studies, we study the setting with
                 time-correlation in requests coming from each user. We
                 use a Markovian process to model each user's request
                 process. In addition to introducing time-correlation in
                 user requests, our model is consistent with empirically
                 observed properties of the request process for VoD
                 services with recommendation engines. In the setting
                 where the underlying Markov Chain is unknown and has to
                 be learned from the very requests the system is trying
                 to serve, we show that separating the task of
                 estimating content popularity and using the estimates
                 to design a static content replication policy is
                 strictly sub-optimal. To prove this, we show that an
                 adaptive policy, which jointly performs the task of
                 estimation and content replication, outperforms all
                 policies that separate the task of estimation and
                 content replication.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Harrison:2019:MRT,
  author =       "P. G. Harrison and N. M. Patel and J. F. P{\'e}rez and
                 Z. Qiu",
  title =        "Managing Response Time Tails by Sharding",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "1",
  pages =        "5:1--5:??",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3300143",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3300143",
  abstract =     "Matrix analytic methods are developed to compute the
                 probability distribution of response times (i.e., data
                 access times) in distributed storage systems protected
                 by erasure coding, which is implemented by sharding a
                 data object into N fragments, only K \N of which are
                 required to reconstruct the object. This leads to a
                 partial-fork-join model with a choice of canceling
                 policies for the redundant N - K tasks. The accuracy of
                 the analytical model is supported by tests against
                 simulation in a broad range of setups. At increasing
                 workload intensities, numerical results show the extent
                 to which increasing the redundancy level reduces the
                 mean response time of storage reads and significantly
                 flattens the tail of their distribution; this is
                 demonstrated at medium-high quantiles, up to the 99th.
                 The quantitative reduction in response time achieved by
                 two policies for canceling redundant tasks is also
                 shown: for cancel-at-finish and cancel-at-start, which
                 limits the additional load introduced whilst losing the
                 benefit of selectivity amongst fragment service
                 times.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{KhudaBukhsh:2019:PPE,
  author =       "Wasiur R. KhudaBukhsh and Sounak Kar and Amr Rizk and
                 Heinz Koeppl",
  title =        "Provisioning and Performance Evaluation of Parallel
                 Systems with Output Synchronization",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "1",
  pages =        "6:1--6:??",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3300142",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3300142",
  abstract =     "Parallel server frameworks are widely deployed in
                 modern large-data processing applications. Intuitively,
                 splitting and parallel processing of the workload
                 provides accelerated application response times and
                 scaling flexibility. Examples of such frameworks
                 include MapReduce, Hadoop, and Spark. For many
                 applications, the dynamics of such systems are
                 naturally captured by a Fork-Join (FJ) queuing model,
                 where incoming jobs are split into tasks each of which
                 is mapped to exactly one server. When all the tasks
                 that belong to one job are executed, the job is
                 reassembled and leaves the system. We consider this
                 behavior at the output as a synchronization constraint.
                 In this article, we study the performance of such
                 parallel systems for different server properties, e.g.,
                 work-conservingness, phase-type behavior, and as
                 suggested by recent evidence, for bursty input job
                 arrivals. We establish a Large Deviations Principle for
                 the steady-state job waiting times in an FJ system
                 based on Markov-additive processes. Building on that,
                 we present a performance analysis framework for FJ
                 systems and provide computable bounds on the tail
                 probabilities of the steady-state waiting times. We
                 validate our bounds using estimates obtained through
                 simulations. In addition, we define and analyze
                 provisioning, a flexible division of jobs into tasks,
                 in FJ systems. Finally, we use this framework together
                 with real-world traces to show the benefits of an
                 adaptive provisioning system that adjusts the service
                 within an FJ system based on the arrival intensity.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Wang:2019:ESR,
  author =       "Da Wang and Gauri Joshi and Gregory W. Wornell",
  title =        "Efficient Straggler Replication in Large-Scale
                 Parallel Computing",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3310336",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3310336",
  abstract =     "In a cloud computing job with many parallel tasks, the
                 tasks on the slowest machines (straggling tasks) become
                 the bottleneck in the job completion. Computing
                 frameworks such as MapReduce and Spark tackle this by
                 replicating the straggling tasks and waiting for any
                 one copy to finish. Despite being adopted in practice,
                 there is little analysis of how replication affects the
                 latency and the cost of additional computing resources.
                 In this article, we provide a framework to analyze this
                 latency-cost tradeoff and find the best replication
                 strategy by answering design questions, such as (1)
                 when to replicate straggling tasks, (2) how many
                 replicas to launch, and (3) whether to kill the
                 original copy or not. Our analysis reveals that for
                 certain execution time distributions, a small amount of
                 task replication can drastically reduce both latency
                 and the cost of computing resources. We also propose an
                 algorithm to estimate the latency and cost based on the
                 empirical distribution of task execution time.
                 Evaluations using samples in the Google Cluster Trace
                 suggest further latency and cost reduction compared to
                 the existing replication strategy used in MapReduce.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Izadpanah:2019:PAP,
  author =       "Ramin Izadpanah and Benjamin A. Allan and Damian
                 Dechev and Jim Brandt",
  title =        "Production Application Performance Data Streaming for
                 System Monitoring",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "2",
  pages =        "8:1--8:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3319498",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3319498",
  abstract =     "In this article, we present an approach to streaming
                 collection of application performance data. Practical
                 application performance tuning and troubleshooting in
                 production high-performance computing (HPC)
                 environments requires an understanding of how
                 applications interact with the platform, including (but
                 not limited to) parallel programming libraries such as
                 Message Passing Interface (MPI). Several profiling and
                 tracing tools exist that collect heavy runtime data
                 traces either in memory (released only at application
                 exit) or on a file system (imposing an I/O load that
                 may interfere with the performance being measured).
                 Although these approaches are beneficial in development
                 stages and post-run analysis, a systemwide and
                 low-overhead method is required to monitor deployed
                 applications continuously. This method must be able to
                 collect information at both the application and system
                 levels to yield a complete performance picture. In our
                 approach, an application profiler collects application
                 event counters. A sampler uses an efficient
                 inter-process communication method to periodically
                 extract the application counters and stream them into
                 an infrastructure for performance data collection. We
                 implement a tool-set based on our approach and
                 integrate it with the Lightweight Distributed Metric
                 Service (LDMS) system, a monitoring system used on
                 large-scale computational platforms. LDMS provides the
                 infrastructure to create and gather streams of
                 performance data in a low overhead manner. We
                 demonstrate our approach using applications implemented
                 with MPI, as it is one of the most common standards for
                 the development of large-scale scientific applications.
                 We utilize our tool-set to study the impact of our
                 approach on an open source HPC application, Nalu. Our
                 tool-set enables us to efficiently identify patterns in
                 the behavior of the application without source-level
                 knowledge. We leverage LDMS to collect system-level
                 performance data and explore the correlation between
                 the system and application events. Also, we demonstrate
                 how our tool-set can help detect anomalies with a low
                 latency. We run tests on two different architectures: a
                 system enabled with Intel Xeon Phi and another system
                 equipped with Intel Xeon processor. Our overhead study
                 shows our method imposes at most 0.5\% CPU usage
                 overhead on the application in realistic deployment
                 scenarios.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Hellemans:2019:PRD,
  author =       "Tim Hellemans and Benny {Van Houdt}",
  title =        "Performance of Redundancy($d$) with Identical\slash
                 Independent Replicas",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "2",
  pages =        "9:1--9:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3316768",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3316768",
  abstract =     "Queueing systems with redundancy have received
                 considerable attention recently. The idea of redundancy
                 is to reduce latency by replicating each incoming job a
                 number of times and to assign these replicas to a set
                 of randomly selected servers. As soon as one replica
                 completes service the remaining replicas are cancelled.
                 Most prior work on queueing systems with redundancy
                 assumes that the job durations of the different
                 replicas are independent and identically distributed
                 (i.i.d.), which yields insights that can be misleading
                 for computer system design. In this article, we develop
                 a differential equation, using the cavity method, to
                 assess the workload and response time distribution in a
                 large homogeneous system with redundancy without the
                 need to rely on this independence assumption. More
                 specifically, we assume that the duration of each
                 replica of a single job is identical across the servers
                 and follows a general service time distribution.
                 Simulation results suggest that the differential
                 equation yields exact results as the system size tends
                 to infinity and can be used to study the stability of
                 the system. We also compare our system to the one with
                 i.i.d. replicas and show the similarity in the analysis
                 used for independent, respectively, identical
                 replicas.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Wang:2019:OTA,
  author =       "Qingyang Wang and Shungeng Zhang and Yasuhiko Kanemasa
                 and Calton Pu and Balaji Palanisamy and Lilian Harada
                 and Motoyuki Kawaba",
  title =        "Optimizing {$N$}-Tier Application Scalability in the
                 Cloud: A Study of Soft Resource Allocation",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "2",
  pages =        "10:1--10:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3326120",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3326120",
  abstract =     "An effective cloud computing environment requires both
                 good performance and high efficiency of computing
                 resources. Through extensive experiments using a
                 representative n-tier benchmark application (Rice
                 University Bulletin Board System (RUBBoS)), we show
                 that the soft resource allocation (e.g., thread pool
                 size and database connection pool size) in component
                 servers has a significant impact on the overall system
                 performance, especially at high system utilization
                 scenarios. Concretely, the same software resource
                 allocation can be a good setting in one hardware
                 configuration and then becomes an either under- or
                 over-allocation in a slightly different hardware
                 configuration, causing a significant performance drop.
                 We have also observed some interesting phenomena that
                 were caused by the non-trivial dependencies between the
                 soft resources of servers in different tiers. For
                 instance, the thread pool size in an Apache web server
                 can limit the total number of concurrent requests to
                 the downstream servers, which surprisingly decreases
                 the Central Processing Unit (CPU) utilization of the
                 Clustered Java Database Connectivity (C-JDBC)
                 clustering middleware as the workload increases. To
                 provide a globally optimal (or near-optimal) soft
                 resource allocation of each tier in the system, we
                 propose a practical iterative solution approach by
                 combining a soft resource aware queuing network model
                 and the fine-grained measurement data of every
                 component server. Our results show that to truly scale
                 complex distributed systems such as n-tier web
                 applications with expected performance in the cloud, we
                 need to carefully manage soft resource allocation in
                 the system.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Namazi:2019:SSO,
  author =       "Alireza Namazi and Saeed Safari and Siamak Mohammadi
                 and Meisam Abdollahi",
  title =        "{SORT}: Semi Online Reliable Task Mapping for Embedded
                 Multi-Core Systems",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "2",
  pages =        "11:1--11:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3322899",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3322899",
  abstract =     "This article proposes a Semi Online Reliable Task
                 (SORT) mapping approach to many-core platforms divided
                 into two sections: offline and online. The offline
                 section is a twofolded approach. It maintains the
                 reliability of the mapped task graph against soft
                 errors considering the reliability threshold defined by
                 designers. As wear-out mechanisms decrease the lifetime
                 of the system, our proposed approach increases the
                 lifetime of the system using task migration scenarios.
                 It specifies task migration plans with the minimum
                 overhead using a novel heuristic approach. SORT
                 maintains the required level of reliability of the task
                 graph in the whole lifetime of the system using a
                 replication technique with minimum replica overhead,
                 maximum achievable performance, and minimum temperature
                 increase. The online segment uses migration plans
                 obtained in the offline segment to increase the
                 lifetime and also permanently maintains the reliability
                 threshold for the task graph during runtime. Results
                 show that the effectiveness of SORT improves on bigger
                 mesh sizes and higher reliability thresholds.
                 Simulation results obtained from real benchmarks show
                 that the proposed approach decreases design-time
                 calculation up to 4,371\% compared to exhaustive
                 exploration while achieving a lifetime negligibly lower
                 than the exhaustive solution (up to 5.83\%).",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Nguyen:2019:PFR,
  author =       "T. T. Hang Nguyen and Olivier Brun and Balakrishna J.
                 Prabhu",
  title =        "Performance of a Fixed Reward Incentive Scheme for
                 Two-hop {DTNs} with Competing Relays",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "2",
  pages =        "12:1--12:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3325288",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3325288",
  abstract =     "We analyze the performance of an incentive scheme for
                 two-hop Delay-Tolerant Networks (DTNs) in which a
                 backlogged source proposes a fixed reward to the relays
                 to deliver a message. Only one message at a time is
                 proposed by the source. For a given message, only the
                 first relay to deliver it gets the reward corresponding
                 to this message thereby inducing a competition between
                 the relays. The relays seek to maximize the expected
                 reward for each message, whereas the objective of the
                 source is to satisfy a given constraint on the
                 probability of message delivery. We show that the
                 optimal policy of a relay is of threshold type: it
                 accepts a message until a first threshold and then
                 keeps the message until it either meets the destination
                 or reaches the second threshold. Formulas for computing
                 the thresholds as well as probability of message
                 delivery are derived for a backlogged source.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Verschoren:2019:EDC,
  author =       "Robin Verschoren and Benny {Van Houdt}",
  title =        "On the Endurance of the $d$-Choices Garbage Collection
                 Algorithm for Flash-Based {SSDs}",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "3",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3326121",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3326121",
  abstract =     "Garbage collection (GC) algorithms for flash-based
                 solid-state drives (SSDs) have a profound impact on its
                 performance and many studies have focused on assessing
                 the so-called write amplification of various GC
                 algorithms. In this article, we consider the family of
                 d -choices GC algorithms and study (a) the extent in
                 which these algorithms induce unequal wear and (b) the
                 manner in which they affect the lifetime of the drive.
                 For this purpose, we introduce two performance
                 measures: PE fairness and SSD endurance. We study the
                 impact of the d -choices GC algorithm on both these
                 measures under different workloads (uniform, synthetic
                 and trace-based) when combined with two different write
                 modes. Numerical results show that the more complex of
                 the two write modes, which requires hot/cold data
                 identification, may not necessarily give rise to a
                 significantly better SSD endurance. Further, the d
                 -choices GC algorithm is often shown to strike a good
                 balance between garbage collection and wear leveling
                 for small d values (e.g., d = 10), yielding high
                 endurance.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Phan:2019:NFE,
  author =       "Tien-Dat Phan and Guillaume Pallez and Shadi Ibrahim
                 and Padma Raghavan",
  title =        "A New Framework for Evaluating Straggler Detection
                 Mechanisms in {MapReduce}",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "3",
  pages =        "14:1--14:??",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3328740",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3328740",
  abstract =     "Big Data systems (e.g., Google MapReduce, Apache
                 Hadoop, Apache Spark) rely increasingly on speculative
                 execution to mask slow tasks, also known as stragglers,
                 because a job's execution time is dominated by the
                 slowest task instance. Big Data systems typically
                 identify stragglers and speculatively run copies of
                 those tasks with the expectation that a copy may
                 complete faster to shorten job execution times. There
                 is a rich body of recent results on straggler
                 mitigation in MapReduce. However, the majority of these
                 do not consider the problem of accurately detecting
                 stragglers. Instead, they adopt a particular straggler
                 detection approach and then study its effectiveness in
                 terms of performance, e.g., reduction in job completion
                 time or higher efficiency, e.g., high resource
                 utilization. In this article, we consider a complete
                 framework for straggler detection and mitigation. We
                 start with a set of metrics that can be used to
                 characterize and detect stragglers including Precision,
                 Recall, Detection Latency, Undetected Time, and Fake
                 Positive. We then develop an architectural model by
                 which these metrics can be linked to measures of
                 performance including execution time and system energy
                 overheads. We further conduct a series of experiments
                 to demonstrate which metrics and approaches are more
                 effective in detecting stragglers and are also
                 predictive of effectiveness in terms of performance and
                 energy efficiencies. For example, our results indicate
                 that the default Hadoop straggler detector could be
                 made more effective. In a certain case, Precision is
                 low and only 55\% of those detected are actual
                 stragglers and the Recall, i.e., percent of actual
                 detected stragglers, is also relatively low at 56\%.
                 For the same case, the hierarchical approach (i.e., a
                 green-driven detector based on the default one)
                 achieves a Precision of 99\% and a Recall of 29\%. This
                 increase in Precision can be translated to achieve
                 lower execution time and energy consumption, and thus
                 higher performance and energy efficiency; compared to
                 the default Hadoop mechanism, the energy consumption is
                 reduced by almost 31\%. These results demonstrate how
                 our framework can offer useful insights and be applied
                 in practical settings to characterize and design new
                 straggler detection mechanisms for MapReduce systems.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Lim:2019:PCI,
  author =       "Chiun Lin Lim and Ki Suh Lee and Han Wang and Hakim
                 Weatherspoon and Ao Tang",
  title =        "Packet Clustering Introduced by Routers: Modeling,
                 Analysis, and Experiments",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "3",
  pages =        "15:1--15:??",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3345032",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3345032",
  abstract =     "In this article, we investigate a router's inherent
                 variation on packet processing time and its effect on
                 interpacket delay and packet clustering. We propose a
                 simple pipeline model incorporating the inherent
                 variation, and two metrics-one to measure packet
                 clustering and one to quantify inherent variation. To
                 isolate the effect of the inherent variation, we begin
                 our analysis with no cross traffic and step through
                 setups where the input streams have different data
                 rates, packet size, and go through a different number
                 of hops. We show that a homogeneous input stream with a
                 sufficiently large interpacket gap will emerge at the
                 router's output with interpacket delays that are
                 negative correlated with adjacent values and have
                 symmetrical distributions. We show that for smaller
                 interpacket gaps, the change in packet clustering is
                 smaller. It is also shown that the degree of packet
                 clustering could in fact decrease for a clustered
                 input. We generalize our results by adding cross
                 traffic. All the results predicted by the model are
                 validated with experiments with real routers. We also
                 investigated several factors that can affect the
                 inherent variation as well as some potential
                 applications of this study.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Bakhshaliyev:2019:ICI,
  author =       "Khalid Bakhshaliyev and Muhammed Abdullah Canbaz and
                 Mehmet Hadi Gunes",
  title =        "Investigating Characteristics of {Internet} Paths",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "3",
  pages =        "16:1--16:??",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3342286",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3342286",
  abstract =     "Interactive and multimedia applications depend on the
                 stability of end-to-end paths for predictable
                 performance and good quality of service. On the other
                 hand, network providers depend on multiple paths to
                 ensure fault tolerance and use load balancing between
                 these paths to enhance the overall network throughput.
                 In this study, we analyze path dynamics for both
                 end-to-end paths and path segments within service
                 providers' networks using 2 months of measurement data
                 from the RIPE Atlas platform, which collects path
                 traces between a fixed set of source and destination
                 pairs every 15 minutes. We observe that 78\% of the
                 end-to-end routes have at least two alternative
                 Autonomous System (AS) paths with some end-to-end
                 routes going through hundreds of different AS paths
                 during the 2 months of analysis. While AS level paths
                 are often prevalent for a day, there are considerable
                 changes in the routing of the trace packets over the
                 ASes for a longer duration of a month or longer.
                 Analyzing end-to-end paths for routing anomalies, we
                 observe that 4.4\% of the path traces (involving 18\%
                 of the ASes) contain routing loops indicating
                 misconfiguration of routers. Some of the ASes had over
                 100 routers involved in loops in path traces through
                 their networks. We observe a much higher rate of
                 anomalies in the AS level, with 45\% of path traces
                 containing an AS loop. Additionally, we discovered that
                 few of the ASes bounce-back packets where some traces
                 through their network traverse routers in both forward
                 and backward directions. Determining path segments
                 belonging to each AS, we further explore ingress to
                 egress paths of ASes in addition to the source to
                 destination paths within the AS. Analyzing trace
                 segments between ingresses and egresses of an AS, we
                 realized more than half of the ASes have the same
                 router level path between any ingress-egress pair for
                 the 2 months, but others implement load balancing.
                 These results are different from earlier studies that
                 indicated a high level of path dynamism. Our results
                 indicate that the end-to-end path dynamism is due to
                 the Border Gateway Protocol level rather than the
                 router level within ASes.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Skevakis:2019:SOF,
  author =       "Emmanouil Skevakis and Ioannis Lambadaris and Hassan
                 Halabian",
  title =        "Scheduling for Optimal File-Transfer Delay using
                 Chunked Random Linear Network Coding Broadcast",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "3",
  pages =        "17:1--17:??",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3340242",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3340242",
  abstract =     "We study the broadcast transmission of a single file
                 to an arbitrary number of receivers using Random Linear
                 Network Coding (RLNC) in a network with unreliable
                 channels. Due to the increased computational complexity
                 of the decoding process (especially for large files),
                 we apply chunked RLNC (i.e., RLNC is applied within
                 non-overlapping subsets of the file). In our work, we
                 show the optimality of the Least Received (LR) batch
                 scheduling policy with regards to the expected file
                 transfer completion time. The LR policy strives to keep
                 the receiver queues balanced. This is done by
                 transmitting packets (corresponding to encoded batches)
                 that are needed by the receivers with the shortest
                 queues of successfully received packets. Furthermore,
                 we provide formulas for the expected time for the file
                 transmission to all receivers using the LR batch
                 scheduling policy and the minimum achievable coding
                 window size in the case of a pre-defined delay
                 constraint. Moreover, we evaluate through simulations a
                 modification of the LR policy in a more realistic
                 system setting with reduced feedback from the
                 receivers. Finally, we provide an initial analysis and
                 further modifications to the LR policy for
                 time-correlated channels and asymmetric channels.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}

@Article{Friedlander:2019:GLC,
  author =       "Eric Friedlander and Vaneet Aggarwal",
  title =        "Generalization of {LRU} Cache Replacement Policy with
                 Applications to Video Streaming",
  journal =      j-TOMPECS,
  volume =       "4",
  number =       "3",
  pages =        "18:1--18:??",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3345022",
  ISSN =         "2376-3639",
  bibdate =      "Sat Sep 21 07:21:17 MDT 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
  URL =          "https://dl.acm.org/citation.cfm?id=3345022",
  abstract =     "Caching plays a crucial role in networking systems to
                 reduce the load on the network and is commonly employed
                 by content delivery networks (CDNs) to improve
                 performance. One of the commonly used mechanisms, Least
                 Recently Used (LRU), works well for identical file
                 sizes. However, for asymmetric file sizes, the
                 performance deteriorates. This article proposes an
                 adaptation to the LRU strategy, called gLRU, where the
                 file is sub-divided into equal-sized chunks. In this
                 strategy, a chunk of the newly requested file is added
                 in the cache, and a chunk of the least-recently-used
                 file is removed from the cache. Even though approximate
                 analysis for the hit rate has been studied for LRU, the
                 analysis does not extend to gLRU, since the metric of
                 interest is no longer the hit rate as the cache has
                 partial files. This article provides a novel
                 approximation analysis for this policy where the cache
                 may have partial file contents. The approximation
                 approach is validated by simulations. Further, gLRU
                 outperforms the LRU strategy for a Zipf file popularity
                 distribution and censored Pareto file size distribution
                 for the file download times. Video streaming
                 applications can further use the partial cache contents
                 to help the stall duration significantly, and the
                 numerical results indicate significant improvements
                 (32\%) in stall duration using the gLRU strategy as
                 compared to the LRU strategy. Furthermore, the gLRU
                 replacement policy compares favorably to two other
                 cache replacement policies when simulated on MSR
                 Cambridge Traces obtained from the SNIA IOTTA
                 repository.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Modeling and Performance
                 Evaluation of Computing Systems (TOMPECS)",
  journal-URL =  "http://dl.acm.org/pub.cfm?id=J1525",
}